From df24dba832030963c8eb7d76e10679dd0b990f33 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Mon, 10 Feb 2025 15:15:01 -0800
Subject: [PATCH 01/18] benchdnn: apply memory protection when running the
 library

---
 tests/benchdnn/common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/benchdnn/common.cpp b/tests/benchdnn/common.cpp
index b7938faeda3..d83491376fd 100644
--- a/tests/benchdnn/common.cpp
+++ b/tests/benchdnn/common.cpp
@@ -239,7 +239,7 @@ static void zfree_protect(void *ptr) {
 
 void *zmalloc(size_t size, size_t align) {
 #ifdef BENCHDNN_MEMORY_CHECK
-    if (has_bench_mode_bit(mode_bit_t::corr)) { return zmalloc_protect(size); }
+    if (has_bench_mode_bit(mode_bit_t::exec)) { return zmalloc_protect(size); }
 #endif
 
     void *ptr;
@@ -264,7 +264,7 @@ void *zmalloc(size_t size, size_t align) {
 void zfree(void *ptr) {
     if (!ptr) return;
 #ifdef BENCHDNN_MEMORY_CHECK
-    if (has_bench_mode_bit(mode_bit_t::corr)) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
         zfree_protect(ptr);
         return;
     }

From 5ceeab9bf982f5d6969d579833f25cad4d7207c7 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 14:30:32 -0800
Subject: [PATCH 02/18] benchdnn: utils: res: styling

---
 tests/benchdnn/dnnl_common.cpp |  9 +++++----
 tests/benchdnn/dnnl_common.hpp |  8 ++++----
 tests/benchdnn/utils/res.hpp   | 33 ++++++++++++---------------------
 3 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index b1cab46ad51..3dbdc9f0c42 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1316,10 +1316,11 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     // Skip the check if the test object won't be executed.
     if (!has_bench_mode_bit(mode_bit_t::exec)) return OK;
 
-    // Skip the check if it has already happened for provided `dir`. Saves from
-    // repreated run when the second test object is created to test the
-    // primitive cache, but allows to verify both objects when a double-run
-    // driver executes fwd-for-bwd first and bwd after.
+    // Skip the check if it has already happened for the passed `dir`.
+    // It saves from a repeated run when the second test object is created to
+    // validate the primitive cache. At the same time it allows to verify both
+    // test objects when a double-run driver executes the fwd-for-bwd object
+    // first and the bwd object after.
     if (need_skip && res->mem_check_dir == dir) return OK;
     res->mem_check_dir = dir;
 
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index b6aa109683e..770588170ac 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -414,10 +414,10 @@ int create_primitive(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &primw,
     if (res->state == SKIPPED) return OK;
 
     // Check memory requirements if only execution happens.
-    // Note: As a graph may contains moare than one operations with identical
-    // `dir`. Since the mem size check for all the operations are necessary,
-    // the check should not be skipped.
-    SAFE(check_mem_size(pdw, res, dir, /*need_skip=*/!is_graph_ref), WARN);
+    // Note: Graph may contain more than one operation with identical `dir`.
+    //   Since the mem size check for all the operations are necessary,
+    //   the check wouldn't be skipped.
+    SAFE(check_mem_size(pdw, res, dir, /* need_skip = */ !is_graph_ref), WARN);
     if (res->state == SKIPPED) return OK;
 
     TIME_C_PRIM(DNN_SAFE(dnnl_primitive_create(&prim, pdw), WARN));
diff --git a/tests/benchdnn/utils/res.hpp b/tests/benchdnn/utils/res.hpp
index 5189da78b80..a4ea7e8b6a7 100644
--- a/tests/benchdnn/utils/res.hpp
+++ b/tests/benchdnn/utils/res.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,52 +17,43 @@
 #ifndef UTILS_RES_HPP
 #define UTILS_RES_HPP
 
-#include <cstring>
-#include <vector>
-
-#include "oneapi/dnnl/dnnl.h"
 #include "oneapi/dnnl/dnnl_types.h"
 
 #include "utils/timer.hpp"
 
+#include <string>
+#include <vector>
+
 struct check_mem_size_args_t {
 
     check_mem_size_args_t() = default;
     check_mem_size_args_t(const_dnnl_primitive_desc_t pd, bool want_input)
-        : pd(pd)
-        , want_input(want_input)
-        , is_scratchpad(false)
-        , total_size_device(0)
-        , total_size_cpu(0)
-        , scratchpad_size(0) {
-        // initialize the memory size for reference path
-        memset(total_ref_md_size, 0, sizeof(total_ref_md_size));
-    }
+        : pd(pd), want_input(want_input) {}
 
     // Input args.
-    const_dnnl_primitive_desc_t pd;
-    bool want_input;
-    bool is_scratchpad;
+    const_dnnl_primitive_desc_t pd = nullptr;
+    bool want_input = false;
+    bool is_scratchpad = false;
 
     // Output args:
     // `sizes` used to validate OpenCL memory requirements.
     std::vector<size_t> sizes;
     // `total_size_device` specifies memory allocated on device for a test obj.
-    size_t total_size_device;
+    size_t total_size_device = 0;
     // `total_size_cpu` specifies:
     // * Memory allocated for reference ocmputations (`C` mode only).
     // * Memory allocated for comparison results (`C` mode only).
     // * Memory allocated for mapping device memory (GPU backend only).
     // * Memory allocated on CPU for a test obj (CPU backend only).
-    size_t total_size_cpu;
+    size_t total_size_cpu = 0;
     // `total_ref_md_size` specifies the additional tag::abx f32 memory
     // required for correctness check.
     // * The first element refers to the total memory for input reference
     // * The second element refers to the total memory for output reference
     // The args are used in memory estimation for graph driver only.
-    size_t total_ref_md_size[2];
+    size_t total_ref_md_size[2] = {0, 0};
     // `scratchpad_size` specifies a scratchpad size for specific checks.
-    size_t scratchpad_size;
+    size_t scratchpad_size = 0;
 };
 
 /* result structure */

From 28a3c940b0093578632b01ecf3fddbb6f9ae7bf6 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:08:29 -0800
Subject: [PATCH 03/18] benchdnn: mem_check: remove an argument present in
 res_t

---
 tests/benchdnn/dnnl_common.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 3dbdc9f0c42..590bd49477a 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1082,8 +1082,7 @@ int get_gpu_cache_size(size_t &cache_size) {
     return OK;
 }
 
-static int check_total_size(
-        const check_mem_size_args_t &check_mem_size_args, res_t *res) {
+static int check_total_size(res_t *res) {
     static size_t cpu_device_capacity = get_cpu_ram_size();
     static size_t gpu_device_capacity = 0;
     static size_t gpu_max_alloc_capacity = 0;
@@ -1104,6 +1103,8 @@ static int check_total_size(
         return (res->mem_check_dir & FLAG_FWD) ? "FWD" : "BWD";
     };
 
+    const check_mem_size_args_t &check_mem_size_args = res->mem_size_args;
+
     if (is_gpu()) {
         const bool fits_device_ram = check_mem_size_args.total_size_device
                 <= benchdnn_device_limit;
@@ -1300,12 +1301,10 @@ void get_memory_bytes(check_mem_size_args_t &check_mem_size_args) {
 int check_mem_size(const_dnnl_memory_desc_t md, res_t *res) {
     if (!mem_check) return OK;
 
-    check_mem_size_args_t check_mem_size_args(nullptr, false);
     const auto md_size = dnnl_memory_desc_get_size(md);
-    check_mem_size_args.total_size_device = md_size;
-    check_mem_size_args.sizes.push_back(md_size);
-
-    return check_total_size(check_mem_size_args, res);
+    res->mem_size_args.total_size_device = md_size;
+    res->mem_size_args.sizes.push_back(md_size);
+    return check_total_size(res);
 }
 
 int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
@@ -1348,10 +1347,11 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     check_mem_size_args.want_input = false;
     get_memory_bytes(check_mem_size_args);
 
-    // Save the mem size args for graph driver check.
+    // Copy memory stats. It's required to accumulate them before performing
+    // the check.
     res->mem_size_args = check_mem_size_args;
 
-    return check_total_size(check_mem_size_args, res);
+    return check_total_size(res);
 }
 
 int get_memory_footprint(const_dnnl_primitive_desc_t const_pd, res_t *res) {

From e7963e79e17046a512f8bf77981019e5ba58ebd4 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:08:57 -0800
Subject: [PATCH 04/18] benchdnn: mem_check: report bytes smarter

---
 tests/benchdnn/dnnl_common.cpp | 52 +++++++++++++++++++++++++---------
 tests/benchdnn/dnnl_common.hpp |  1 +
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 590bd49477a..32773c854a4 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1082,6 +1082,29 @@ int get_gpu_cache_size(size_t &cache_size) {
     return OK;
 }
 
+std::string smart_bytes(double bytes) {
+    std::string s;
+    static constexpr int oneK = 1024;
+
+    if (bytes < oneK) {
+        s = std::to_string(static_cast<size_t>(bytes)) + " B";
+        return s;
+    }
+    auto KB = bytes / oneK;
+    if (KB < oneK) {
+        s = std::to_string(KB) + " KB";
+        return s;
+    }
+    auto MB = KB / oneK;
+    if (MB < oneK) {
+        s = std::to_string(MB) + " MB";
+        return s;
+    }
+    auto GB = MB / oneK;
+    s = std::to_string(GB) + " GB";
+    return s;
+}
+
 static int check_total_size(res_t *res) {
     static size_t cpu_device_capacity = get_cpu_ram_size();
     static size_t gpu_device_capacity = 0;
@@ -1098,7 +1121,6 @@ static int check_total_size(res_t *res) {
     const double benchdnn_cpu_limit = capacity_factor * cpu_max_capacity;
     assert(benchdnn_device_limit > 0 && benchdnn_cpu_limit > 0);
 
-    auto GB = [](double bytes) { return bytes / powf(2, 30); };
     auto dir_c_str = [&res]() {
         return (res->mem_check_dir & FLAG_FWD) ? "FWD" : "BWD";
     };
@@ -1122,9 +1144,10 @@ static int check_total_size(res_t *res) {
                     const bool fit = s < gpu_max_alloc_capacity;
                     if (!fit) {
                         BENCHDNN_PRINT(2,
-                                "[CHECK_MEM][%s]: Allocation of size %g GB "
-                                "doesn't fit allocation limit of %g GB.\n",
-                                dir_c_str(), GB(s), GB(gpu_max_alloc_capacity));
+                                "[CHECK_MEM][%s]: Allocation of size %s "
+                                "doesn't fit allocation limit of %s.\n",
+                                dir_c_str(), smart_bytes(s).c_str(),
+                                smart_bytes(gpu_max_alloc_capacity).c_str());
                     }
                     return fit;
                 });
@@ -1134,11 +1157,13 @@ static int check_total_size(res_t *res) {
         }
 
         BENCHDNN_PRINT((!fits_device_ram ? 2 : 6),
-                "[CHECK_MEM][%s]: Requested: %g GB; benchdnn_device_limit: %g "
-                "GB; device_RAM_capacity: %g GB; gpu_max_alloc: %g GB;\n",
-                dir_c_str(), GB(check_mem_size_args.total_size_device),
-                GB(benchdnn_device_limit), GB(gpu_device_capacity),
-                GB(gpu_max_alloc_capacity));
+                "[CHECK_MEM][%s]: Requested: %s; benchdnn_device_limit: %s; "
+                "device_RAM_capacity: %s; gpu_max_alloc: %s;\n",
+                dir_c_str(),
+                smart_bytes(check_mem_size_args.total_size_device).c_str(),
+                smart_bytes(benchdnn_device_limit).c_str(),
+                smart_bytes(gpu_device_capacity).c_str(),
+                smart_bytes(gpu_max_alloc_capacity).c_str());
     }
 
     size_t total_size_cpu = check_mem_size_args.total_size_cpu;
@@ -1177,10 +1202,11 @@ static int check_total_size(res_t *res) {
     }
 
     BENCHDNN_PRINT((!fits_cpu_ram ? 2 : 6),
-            "[CHECK_MEM][%s]: Requested: %g GB; benchdnn_CPU_limit: %g GB; "
-            "CPU_RAM_capacity: %g GB;\n",
-            dir_c_str(), GB(total_size_cpu), GB(benchdnn_cpu_limit),
-            GB(cpu_device_capacity));
+            "[CHECK_MEM][%s]: Requested: %s; benchdnn_CPU_limit: %s; "
+            "CPU_RAM_capacity: %s;\n",
+            dir_c_str(), smart_bytes(total_size_cpu).c_str(),
+            smart_bytes(benchdnn_cpu_limit).c_str(),
+            smart_bytes(cpu_device_capacity).c_str());
 
     return res->state == FAILED ? FAIL : OK;
 }
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index 770588170ac..10df7e94582 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -235,6 +235,7 @@ int get_gpu_ram_sizes(size_t &ram_size, size_t &max_alloc_size);
 int get_cpu_cache_size(cpu_cache_args_t &cache_args);
 int get_gpu_cache_size(size_t &cache_size);
 
+std::string smart_bytes(double bytes);
 bool is_fwd_training(dnnl_prop_kind_t prop_kind);
 bool is_fwd_prop_kind(dnnl_prop_kind_t prop_kind);
 int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);

From 921ff3390abffdd7d2b2cdf3db7c716d631d804f Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:10:40 -0800
Subject: [PATCH 05/18] benchdnn: mem_check: change verbose level for reporting
 to 1

---
 tests/benchdnn/dnnl_common.cpp      | 12 ++++++------
 tests/benchdnn/doc/knobs_verbose.md |  4 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 32773c854a4..a7c94d4539e 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1131,7 +1131,7 @@ static int check_total_size(res_t *res) {
         const bool fits_device_ram = check_mem_size_args.total_size_device
                 <= benchdnn_device_limit;
         if (!fits_device_ram) {
-            BENCHDNN_PRINT(2,
+            BENCHDNN_PRINT(1,
                     "[CHECK_MEM][%s]: Not enough device RAM for a problem.\n",
                     dir_c_str());
             res->state = SKIPPED;
@@ -1143,7 +1143,7 @@ static int check_total_size(res_t *res) {
                 check_mem_size_args.sizes.cend(), [&](size_t s) {
                     const bool fit = s < gpu_max_alloc_capacity;
                     if (!fit) {
-                        BENCHDNN_PRINT(2,
+                        BENCHDNN_PRINT(1,
                                 "[CHECK_MEM][%s]: Allocation of size %s "
                                 "doesn't fit allocation limit of %s.\n",
                                 dir_c_str(), smart_bytes(s).c_str(),
@@ -1156,7 +1156,7 @@ static int check_total_size(res_t *res) {
             res->reason = skip_reason::not_enough_ram;
         }
 
-        BENCHDNN_PRINT((!fits_device_ram ? 2 : 6),
+        BENCHDNN_PRINT((!fits_device_ram ? 1 : 6),
                 "[CHECK_MEM][%s]: Requested: %s; benchdnn_device_limit: %s; "
                 "device_RAM_capacity: %s; gpu_max_alloc: %s;\n",
                 dir_c_str(),
@@ -1174,7 +1174,7 @@ static int check_total_size(res_t *res) {
     bool fits_cpu_ram = total_size_cpu <= benchdnn_cpu_limit;
 
     if (!fits_cpu_ram) {
-        BENCHDNN_PRINT(2,
+        BENCHDNN_PRINT(1,
                 "[CHECK_MEM][%s]: Not enough CPU RAM for a problem.\n",
                 dir_c_str());
         // Try to catch a huge scratchpad size requested by the library.
@@ -1188,7 +1188,7 @@ static int check_total_size(res_t *res) {
         if (is_cpu()
                 && check_mem_size_args.scratchpad_size
                         > scratch_trh * check_mem_size_args.total_size_device) {
-            BENCHDNN_PRINT(2,
+            BENCHDNN_PRINT(1,
                     "[CHECK_MEM][%s]: CPU scratchpad size `%zu` exceeded a "
                     "given threshold `%zu`.\n",
                     dir_c_str(), check_mem_size_args.scratchpad_size,
@@ -1201,7 +1201,7 @@ static int check_total_size(res_t *res) {
         res->reason = skip_reason::not_enough_ram;
     }
 
-    BENCHDNN_PRINT((!fits_cpu_ram ? 2 : 6),
+    BENCHDNN_PRINT((!fits_cpu_ram ? 1 : 6),
             "[CHECK_MEM][%s]: Requested: %s; benchdnn_CPU_limit: %s; "
             "CPU_RAM_capacity: %s;\n",
             dir_c_str(), smart_bytes(total_size_cpu).c_str(),
diff --git a/tests/benchdnn/doc/knobs_verbose.md b/tests/benchdnn/doc/knobs_verbose.md
index 0f2b4b8d880..2a229d8dfc5 100644
--- a/tests/benchdnn/doc/knobs_verbose.md
+++ b/tests/benchdnn/doc/knobs_verbose.md
@@ -16,6 +16,8 @@ following information is printed for certain verbosity levels.
 ## Level 1
 * Problem reproducer line right after the problem was constructed. It is
   convenient to catch the repro line in case of a program crash.
+* The problem memory footprint and RAM capacity on devices in cases when the
+  limit is reached and the problem will be skipped.
 
 ## Level 2
 * Various warnings.
@@ -29,7 +31,7 @@ following information is printed for certain verbosity levels.
 * The library implementation name picked to compute the given problem.
 
 ## Level 6
-* The problem memory footprint and RAM capacity on devices.
+* The problem memory footprint and RAM capacity on devices, unconditionally.
 * Fill configuration stats.
 * Compare configuration stats.
 * Additional implementation filtering information.

From 7938c77cf8402eafc1413c6bc31604b2444312f6 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:11:01 -0800
Subject: [PATCH 06/18] benchdnn: mem_check: add sizes and scratchpad reporting

---
 tests/benchdnn/dnnl_common.cpp | 43 +++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index a7c94d4539e..31192efe02b 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1113,12 +1113,11 @@ static int check_total_size(res_t *res) {
 
     const size_t device_max_capacity
             = is_cpu() ? cpu_device_capacity : gpu_device_capacity;
-    const size_t cpu_max_capacity = cpu_device_capacity;
 
     // 0.75f is taken randomly and is subject to change in future.
     const double capacity_factor = 0.75;
     const double benchdnn_device_limit = capacity_factor * device_max_capacity;
-    const double benchdnn_cpu_limit = capacity_factor * cpu_max_capacity;
+    const double benchdnn_cpu_limit = capacity_factor * cpu_device_capacity;
     assert(benchdnn_device_limit > 0 && benchdnn_cpu_limit > 0);
 
     auto dir_c_str = [&res]() {
@@ -1166,12 +1165,18 @@ static int check_total_size(res_t *res) {
                 smart_bytes(gpu_max_alloc_capacity).c_str());
     }
 
-    size_t total_size_cpu = check_mem_size_args.total_size_cpu;
+    // If the problem runs on CPU, the combined memory represents requirements
+    // for the library and for the reference paths.
+    // If the problem runs on a device, the combined memory represents potential
+    // requirement for integrated devices that use CPU pool for both memories.
+    // The second case has higher limit because TODO:<the_reason>.
+    size_t cpu_and_device_size = check_mem_size_args.total_size_cpu
+            + check_mem_size_args.total_size_device;
+    bool fits_cpu_ram = cpu_and_device_size
+            <= (is_cpu() ? benchdnn_cpu_limit : cpu_device_capacity);
 
-    // Add device size as a simple method to account for integrated devices and
-    // mapping/unmapping memory
-    total_size_cpu += check_mem_size_args.total_size_device;
-    bool fits_cpu_ram = total_size_cpu <= benchdnn_cpu_limit;
+    // Check combined size against CPU capacity as the simpler method to account
+    // for integrated devices and mapping/unmapping memory.
 
     if (!fits_cpu_ram) {
         BENCHDNN_PRINT(1,
@@ -1202,12 +1207,28 @@ static int check_total_size(res_t *res) {
     }
 
     BENCHDNN_PRINT((!fits_cpu_ram ? 1 : 6),
-            "[CHECK_MEM][%s]: Requested: %s; benchdnn_CPU_limit: %s; "
-            "CPU_RAM_capacity: %s;\n",
-            dir_c_str(), smart_bytes(total_size_cpu).c_str(),
-            smart_bytes(benchdnn_cpu_limit).c_str(),
+            "[CHECK_MEM][%s]: benchdnn_CPU_limit: %s; CPU_RAM_capacity: %s;\n",
+            dir_c_str(), smart_bytes(benchdnn_cpu_limit).c_str(),
             smart_bytes(cpu_device_capacity).c_str());
 
+    std::string sizes_str;
+    for (const auto sz : check_mem_size_args.sizes) {
+        const bool is_scratchpad = sz == check_mem_size_args.scratchpad_size;
+        sizes_str += smart_bytes(sz) + (is_scratchpad ? " (Scratchpad)" : "")
+                + ", ";
+    }
+    BENCHDNN_PRINT(6, "[CHECK_MEM][%s]: Sizes: {%s};\n", dir_c_str(),
+            sizes_str.c_str());
+
+    std::string total_size_device_str = is_cpu()
+            ? smart_bytes(check_mem_size_args.total_size_device) + " (Lib), "
+            : "";
+    BENCHDNN_PRINT((!fits_cpu_ram ? 1 : 6),
+            "[CHECK_MEM][%s]: Requested: %s%s (Service), %s (combined);\n",
+            dir_c_str(), total_size_device_str.c_str(),
+            smart_bytes(check_mem_size_args.total_size_cpu).c_str(),
+            smart_bytes(cpu_and_device_size).c_str());
+
     return res->state == FAILED ? FAIL : OK;
 }
 

From 886afaf2026e3d67051db8dc17cb6e05e8242cf7 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:17:49 -0800
Subject: [PATCH 07/18] benchdnn: mem_check: simplify scratchpad size retrieval

---
 tests/benchdnn/dnnl_common.cpp | 84 ++++++++++++++++------------------
 tests/benchdnn/utils/res.hpp   |  1 -
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 31192efe02b..7e6855a0db7 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1257,36 +1257,32 @@ void add_md_size(const_dnnl_memory_desc_t md,
     // Mapped memory for GPU backend on CPU.
     check_mem_size_args.total_size_cpu += mapped_mem_factor * mem_size;
 
-    if (check_mem_size_args.is_scratchpad) {
-        check_mem_size_args.scratchpad_size += mem_size;
-    } else {
-        const bool is_corr = has_bench_mode_bit(mode_bit_t::corr);
-        const bool is_bitwise = has_bench_mode_bit(mode_bit_t::bitwise);
-        // Reference memories are always tag::abx fp32, hence need re-creating
-        // memory descriptor and take its size.
-        auto ref_md = dnn_mem_t::init_md(
-                query_md_ndims(md), query_md_dims(md), dnnl_f32, tag::abx);
-        const auto ref_md_size = dnnl_memory_desc_get_size(ref_md);
-
-        const size_t ref_mem_idx = check_mem_size_args.want_input ? 0 : 1;
-        check_mem_size_args.total_ref_md_size[ref_mem_idx] = ref_md_size;
-
-        // A memory copy for ref_compute, happens only in correctness.
-        check_mem_size_args.total_size_cpu += is_corr * ref_md_size;
-
-        // Comparison function allocates an additional tag::abx f32 memory.
-        // This allocation holds for correctness and bitwise modes.
-        const bool compare_mem_factor
-                = !check_mem_size_args.want_input && (is_corr || is_bitwise);
-        check_mem_size_args.total_size_cpu += compare_mem_factor * ref_md_size;
-
-        // Bitwise comparison allocates an additional tag::abx f32 memory from
-        // the first run to compare results against it.
-        const bool bitwise_compare_mem_factor
-                = !check_mem_size_args.want_input && is_bitwise;
-        check_mem_size_args.total_size_cpu
-                += bitwise_compare_mem_factor * ref_md_size;
-    }
+    const bool is_corr = has_bench_mode_bit(mode_bit_t::corr);
+    const bool is_bitwise = has_bench_mode_bit(mode_bit_t::bitwise);
+    // Reference memories are always tag::abx fp32, hence need re-creating
+    // memory descriptor and take its size.
+    auto ref_md = dnn_mem_t::init_md(
+            query_md_ndims(md), query_md_dims(md), dnnl_f32, tag::abx);
+    const auto ref_md_size = dnnl_memory_desc_get_size(ref_md);
+
+    const size_t ref_mem_idx = check_mem_size_args.want_input ? 0 : 1;
+    check_mem_size_args.total_ref_md_size[ref_mem_idx] = ref_md_size;
+
+    // A memory copy for ref_compute, happens only in correctness.
+    check_mem_size_args.total_size_cpu += is_corr * ref_md_size;
+
+    // Comparison function allocates an additional tag::abx f32 memory.
+    // This allocation holds for correctness and bitwise modes.
+    const bool compare_mem_factor
+            = !check_mem_size_args.want_input && (is_corr || is_bitwise);
+    check_mem_size_args.total_size_cpu += compare_mem_factor * ref_md_size;
+
+    // Bitwise comparison allocates an additional tag::abx f32 memory from
+    // the first run to compare results against it.
+    const bool bitwise_compare_mem_factor
+            = !check_mem_size_args.want_input && is_bitwise;
+    check_mem_size_args.total_size_cpu
+            += bitwise_compare_mem_factor * ref_md_size;
 }
 
 bool is_fwd_training(dnnl_prop_kind_t prop_kind) {
@@ -1375,20 +1371,20 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     get_memory_bytes(check_mem_size_args);
 
     // Get scratchpad size.
-    // Since scratchpad modes are mutually excluded, it takes sizes of both
-    // modes as either of them will report 0 size depending on the mode.
-    const auto library_scratchpad_size = query_mem_consumption(const_pd);
-    if (library_scratchpad_size > 0) {
-        // Update same fields as `add_md_size` would. See details there.
-        check_mem_size_args.sizes.push_back(library_scratchpad_size);
-        check_mem_size_args.total_size_device += library_scratchpad_size;
-        check_mem_size_args.scratchpad_size += library_scratchpad_size;
-    } else {
-        check_mem_size_args.is_scratchpad = true;
-        const auto &scratchpad_md = query_md(const_pd, DNNL_ARG_SCRATCHPAD);
-        add_md_size(scratchpad_md, check_mem_size_args);
-        check_mem_size_args.is_scratchpad = false;
-    }
+    // Since scratchpad modes are mutually excluded, get sizes of both modes as
+    // either of them will report 0 size depending on the mode, and take the
+    // biggest from them.
+    const size_t library_scratchpad_size
+            = static_cast<size_t>(query_mem_consumption(const_pd));
+    const auto &scratchpad_md = query_md(const_pd, DNNL_ARG_SCRATCHPAD);
+    const size_t user_scratchpad_size
+            = dnnl_memory_desc_get_size(scratchpad_md);
+    const size_t scratchpad_size
+            = MAX2(library_scratchpad_size, user_scratchpad_size);
+    // Update same fields as `add_md_size` would. See details there.
+    check_mem_size_args.sizes.push_back(scratchpad_size);
+    check_mem_size_args.total_size_device += scratchpad_size;
+    check_mem_size_args.scratchpad_size = scratchpad_size;
 
     // Get output sizes.
     check_mem_size_args.want_input = false;
diff --git a/tests/benchdnn/utils/res.hpp b/tests/benchdnn/utils/res.hpp
index a4ea7e8b6a7..5dadef0f10c 100644
--- a/tests/benchdnn/utils/res.hpp
+++ b/tests/benchdnn/utils/res.hpp
@@ -33,7 +33,6 @@ struct check_mem_size_args_t {
     // Input args.
     const_dnnl_primitive_desc_t pd = nullptr;
     bool want_input = false;
-    bool is_scratchpad = false;
 
     // Output args:
     // `sizes` used to validate OpenCL memory requirements.

From 70a544b7d55bdf34f62e289a0eeb2a33b1b04201 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 29 Jan 2025 18:39:50 -0800
Subject: [PATCH 08/18] benchdnn: mem_check: move dir inside mem_size_args

---
 tests/benchdnn/dnnl_common.cpp | 14 ++++---
 tests/benchdnn/utils/res.hpp   | 67 +++++++++++++++++-----------------
 2 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 7e6855a0db7..0b1f2ed803b 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1121,7 +1121,8 @@ static int check_total_size(res_t *res) {
     assert(benchdnn_device_limit > 0 && benchdnn_cpu_limit > 0);
 
     auto dir_c_str = [&res]() {
-        return (res->mem_check_dir & FLAG_FWD) ? "FWD" : "BWD";
+        assert(res->mem_size_args.dir != DIR_UNDEF);
+        return (res->mem_size_args.dir & FLAG_FWD) ? "FWD" : "BWD";
     };
 
     const check_mem_size_args_t &check_mem_size_args = res->mem_size_args;
@@ -1363,11 +1364,12 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     // validate the primitive cache. At the same time it allows to verify both
     // test objects when a double-run driver executes the fwd-for-bwd object
     // first and the bwd object after.
-    if (need_skip && res->mem_check_dir == dir) return OK;
-    res->mem_check_dir = dir;
+    // ANCHOR: MEM_CHECK_ARGS_DIR;
+    if (need_skip && res->mem_size_args.dir == dir) return OK;
 
     // Get input sizes.
-    check_mem_size_args_t check_mem_size_args(const_pd, /* input = */ true);
+    check_mem_size_args_t check_mem_size_args(
+            const_pd, /* input = */ true, dir);
     get_memory_bytes(check_mem_size_args);
 
     // Get scratchpad size.
@@ -1399,10 +1401,10 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
 
 int get_memory_footprint(const_dnnl_primitive_desc_t const_pd, res_t *res) {
     check_mem_size_args_t check_mem_in_size_args(
-            const_pd, /* want_input = */ true);
+            const_pd, /* want_input = */ true, DIR_UNDEF);
     get_memory_bytes(check_mem_in_size_args); // Get input bytes.
     check_mem_size_args_t check_mem_out_size_args(
-            const_pd, /* want_input = */ false);
+            const_pd, /* want_input = */ false, DIR_UNDEF);
     get_memory_bytes(check_mem_out_size_args); // Get output bytes.
 
     // Sum post-ops include dst bytes as an input. Not included in get_memory_bytes
diff --git a/tests/benchdnn/utils/res.hpp b/tests/benchdnn/utils/res.hpp
index 5dadef0f10c..fa728a4958b 100644
--- a/tests/benchdnn/utils/res.hpp
+++ b/tests/benchdnn/utils/res.hpp
@@ -24,37 +24,6 @@
 #include <string>
 #include <vector>
 
-struct check_mem_size_args_t {
-
-    check_mem_size_args_t() = default;
-    check_mem_size_args_t(const_dnnl_primitive_desc_t pd, bool want_input)
-        : pd(pd), want_input(want_input) {}
-
-    // Input args.
-    const_dnnl_primitive_desc_t pd = nullptr;
-    bool want_input = false;
-
-    // Output args:
-    // `sizes` used to validate OpenCL memory requirements.
-    std::vector<size_t> sizes;
-    // `total_size_device` specifies memory allocated on device for a test obj.
-    size_t total_size_device = 0;
-    // `total_size_cpu` specifies:
-    // * Memory allocated for reference ocmputations (`C` mode only).
-    // * Memory allocated for comparison results (`C` mode only).
-    // * Memory allocated for mapping device memory (GPU backend only).
-    // * Memory allocated on CPU for a test obj (CPU backend only).
-    size_t total_size_cpu = 0;
-    // `total_ref_md_size` specifies the additional tag::abx f32 memory
-    // required for correctness check.
-    // * The first element refers to the total memory for input reference
-    // * The second element refers to the total memory for output reference
-    // The args are used in memory estimation for graph driver only.
-    size_t total_ref_md_size[2] = {0, 0};
-    // `scratchpad_size` specifies a scratchpad size for specific checks.
-    size_t scratchpad_size = 0;
-};
-
 /* result structure */
 enum res_state_t {
     UNTESTED = 0,
@@ -86,6 +55,38 @@ enum dir_t {
     BWD_WB = FLAG_BWD + FLAG_WEI + FLAG_BIA,
 };
 
+struct check_mem_size_args_t {
+    check_mem_size_args_t() = default;
+    check_mem_size_args_t(
+            const_dnnl_primitive_desc_t pd, bool want_input, dir_t dir)
+        : pd(pd), want_input(want_input), dir(dir) {}
+
+    // Input args.
+    const_dnnl_primitive_desc_t pd = nullptr;
+    bool want_input = false;
+    dir_t dir = DIR_UNDEF; // See ANCHOR: MEM_CHECK_ARGS_DIR;
+
+    // Output args:
+    // `sizes` used to validate OpenCL memory requirements.
+    std::vector<size_t> sizes;
+    // `total_size_device` specifies memory allocated on device for a test obj.
+    size_t total_size_device = 0;
+    // `total_size_cpu` specifies:
+    // * Memory allocated for reference ocmputations (`C` mode only).
+    // * Memory allocated for comparison results (`C` mode only).
+    // * Memory allocated for mapping device memory (GPU backend only).
+    // * Memory allocated on CPU for a test obj (CPU backend only).
+    size_t total_size_cpu = 0;
+    // `total_ref_md_size` specifies the additional tag::abx f32 memory
+    // required for correctness check.
+    // * The first element refers to the total memory for input reference
+    // * The second element refers to the total memory for output reference
+    // The args are used in memory estimation for graph driver only.
+    size_t total_ref_md_size[2] = {0, 0};
+    // `scratchpad_size` specifies a scratchpad size for specific checks.
+    size_t scratchpad_size = 0;
+};
+
 struct res_t {
     res_state_t state;
     size_t errors, total;
@@ -93,10 +94,8 @@ struct res_t {
     std::string impl_name;
     std::string prim_ref_repro;
     std::string reason;
+    // TODO: fuse `ibytes` and `obytes` into `mem_size_args`.
     size_t ibytes, obytes;
-
-    // TODO: merge mem_check_dir into check_mem_size_args_t
-    dir_t mem_check_dir = DIR_UNDEF;
     check_mem_size_args_t mem_size_args;
 };
 

From 235bfc12b7d670d63dbdd3d8f2adabdf4013b7e8 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 12 Feb 2025 09:59:23 -0800
Subject: [PATCH 09/18] benchdnn: styling: consolidate type declarations under
 a macro

---
 tests/benchdnn/binary/bench_binary.cpp         | 14 ++------------
 tests/benchdnn/bnorm/bench_bnorm.cpp           | 14 ++------------
 tests/benchdnn/concat/bench_concat.cpp         | 14 ++------------
 tests/benchdnn/conv/bench_conv.cpp             | 12 +-----------
 tests/benchdnn/deconv/bench_deconv.cpp         | 12 +-----------
 tests/benchdnn/eltwise/bench_eltwise.cpp       | 14 ++------------
 tests/benchdnn/gnorm/bench_gnorm.cpp           | 14 ++------------
 tests/benchdnn/ip/bench_ip.cpp                 | 12 +-----------
 tests/benchdnn/lnorm/bench_lnorm.cpp           | 14 ++------------
 tests/benchdnn/lrn/bench_lrn.cpp               | 14 ++------------
 tests/benchdnn/matmul/bench_matmul.cpp         | 12 +-----------
 tests/benchdnn/pool/bench_pool.cpp             | 14 ++------------
 tests/benchdnn/prelu/bench_prelu.cpp           | 14 ++------------
 tests/benchdnn/reduction/bench_reduction.cpp   | 14 ++------------
 tests/benchdnn/reorder/bench_reorder.cpp       | 14 ++------------
 tests/benchdnn/resampling/bench_resampling.cpp | 14 ++------------
 tests/benchdnn/rnn/bench_rnn.cpp               | 14 ++------------
 tests/benchdnn/rnn/rnn_task_executor.hpp       | 15 ++++++++++++++-
 tests/benchdnn/shuffle/bench_shuffle.cpp       | 14 ++------------
 tests/benchdnn/softmax/bench_softmax.cpp       | 14 ++------------
 tests/benchdnn/sum/bench_sum.cpp               | 14 ++------------
 tests/benchdnn/utils/task_executor.hpp         | 17 ++++++++++++++++-
 22 files changed, 66 insertions(+), 238 deletions(-)

diff --git a/tests/benchdnn/binary/bench_binary.cpp b/tests/benchdnn/binary/bench_binary.cpp
index 77e9a319477..ca318788df9 100644
--- a/tests/benchdnn/binary/bench_binary.cpp
+++ b/tests/benchdnn/binary/bench_binary.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@
 
 namespace binary {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/bnorm/bench_bnorm.cpp b/tests/benchdnn/bnorm/bench_bnorm.cpp
index 68e791bc5d4..dcce5ec342d 100644
--- a/tests/benchdnn/bnorm/bench_bnorm.cpp
+++ b/tests/benchdnn/bnorm/bench_bnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,17 +26,7 @@
 
 namespace bnorm {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/concat/bench_concat.cpp b/tests/benchdnn/concat/bench_concat.cpp
index a00e780bee1..32ddf58ff44 100644
--- a/tests/benchdnn/concat/bench_concat.cpp
+++ b/tests/benchdnn/concat/bench_concat.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@
 
 namespace concat {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/conv/bench_conv.cpp b/tests/benchdnn/conv/bench_conv.cpp
index 993ad5a88de..76a2b7a7b93 100644
--- a/tests/benchdnn/conv/bench_conv.cpp
+++ b/tests/benchdnn/conv/bench_conv.cpp
@@ -27,17 +27,7 @@
 
 namespace conv {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/deconv/bench_deconv.cpp b/tests/benchdnn/deconv/bench_deconv.cpp
index 3a1b2193342..2cd2cdcadd6 100644
--- a/tests/benchdnn/deconv/bench_deconv.cpp
+++ b/tests/benchdnn/deconv/bench_deconv.cpp
@@ -26,17 +26,7 @@
 
 namespace deconv {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/eltwise/bench_eltwise.cpp b/tests/benchdnn/eltwise/bench_eltwise.cpp
index 8bc44a9aa81..c6ae77c5a2e 100644
--- a/tests/benchdnn/eltwise/bench_eltwise.cpp
+++ b/tests/benchdnn/eltwise/bench_eltwise.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -27,17 +27,7 @@
 
 namespace eltwise {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/gnorm/bench_gnorm.cpp b/tests/benchdnn/gnorm/bench_gnorm.cpp
index 7e830505e6c..08a24b5f3cf 100644
--- a/tests/benchdnn/gnorm/bench_gnorm.cpp
+++ b/tests/benchdnn/gnorm/bench_gnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@ using namespace bnorm;
 
 namespace gnorm {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/ip/bench_ip.cpp b/tests/benchdnn/ip/bench_ip.cpp
index f13a7174694..55ebd04b78d 100644
--- a/tests/benchdnn/ip/bench_ip.cpp
+++ b/tests/benchdnn/ip/bench_ip.cpp
@@ -25,17 +25,7 @@
 
 namespace ip {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/lnorm/bench_lnorm.cpp b/tests/benchdnn/lnorm/bench_lnorm.cpp
index cd499ade942..6a60d7ecca5 100644
--- a/tests/benchdnn/lnorm/bench_lnorm.cpp
+++ b/tests/benchdnn/lnorm/bench_lnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -29,17 +29,7 @@ using namespace bnorm;
 
 namespace lnorm {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/lrn/bench_lrn.cpp b/tests/benchdnn/lrn/bench_lrn.cpp
index 14f2c498ac5..3cb4de49af4 100644
--- a/tests/benchdnn/lrn/bench_lrn.cpp
+++ b/tests/benchdnn/lrn/bench_lrn.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,17 +26,7 @@
 
 namespace lrn {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/matmul/bench_matmul.cpp b/tests/benchdnn/matmul/bench_matmul.cpp
index 6d1d822fe34..6ae452bcb1e 100644
--- a/tests/benchdnn/matmul/bench_matmul.cpp
+++ b/tests/benchdnn/matmul/bench_matmul.cpp
@@ -25,17 +25,7 @@
 
 namespace matmul {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/pool/bench_pool.cpp b/tests/benchdnn/pool/bench_pool.cpp
index f61597818ab..f2a2f390923 100644
--- a/tests/benchdnn/pool/bench_pool.cpp
+++ b/tests/benchdnn/pool/bench_pool.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,17 +26,7 @@
 
 namespace pool {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/prelu/bench_prelu.cpp b/tests/benchdnn/prelu/bench_prelu.cpp
index 19956fa0570..e79ce4d65cb 100644
--- a/tests/benchdnn/prelu/bench_prelu.cpp
+++ b/tests/benchdnn/prelu/bench_prelu.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@
 
 namespace prelu {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/reduction/bench_reduction.cpp b/tests/benchdnn/reduction/bench_reduction.cpp
index 5faadf0a24e..0e031cb2474 100644
--- a/tests/benchdnn/reduction/bench_reduction.cpp
+++ b/tests/benchdnn/reduction/bench_reduction.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -21,17 +21,7 @@
 
 namespace reduction {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/reorder/bench_reorder.cpp b/tests/benchdnn/reorder/bench_reorder.cpp
index f1a7a59c575..5badbc08ed6 100644
--- a/tests/benchdnn/reorder/bench_reorder.cpp
+++ b/tests/benchdnn/reorder/bench_reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -24,17 +24,7 @@
 
 namespace reorder {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/resampling/bench_resampling.cpp b/tests/benchdnn/resampling/bench_resampling.cpp
index 0c67f41c609..d9617bbc94a 100644
--- a/tests/benchdnn/resampling/bench_resampling.cpp
+++ b/tests/benchdnn/resampling/bench_resampling.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,17 +26,7 @@
 
 namespace resampling {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/rnn/bench_rnn.cpp b/tests/benchdnn/rnn/bench_rnn.cpp
index 21e4251372e..25a02b6e71c 100644
--- a/tests/benchdnn/rnn/bench_rnn.cpp
+++ b/tests/benchdnn/rnn/bench_rnn.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -33,17 +33,7 @@
 
 namespace rnn {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t &,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t &, res_t *)>;
-using driver_task_executor_t = rnn_task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/rnn/rnn_task_executor.hpp b/tests/benchdnn/rnn/rnn_task_executor.hpp
index d29130abda8..5ccb8a78e2f 100644
--- a/tests/benchdnn/rnn/rnn_task_executor.hpp
+++ b/tests/benchdnn/rnn/rnn_task_executor.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -20,6 +20,19 @@
 #include "rnn/rnn_task.hpp"
 #include "utils/parallel.hpp"
 
+#define TASK_EXECUTOR_DECL_TYPES \
+    using create_func_t = std::function<int( \
+            std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t &, res_t *)>; \
+    using check_cache_func_t = std::function<int( \
+            std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t *, res_t *)>; \
+    using do_func_t = std::function<int( \
+            const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t &, res_t *)>; \
+    using driver_task_executor_t = rnn_task_executor_t<prb_t, perf_report_t, \
+            create_func_t, check_cache_func_t, do_func_t>;
+
 extern int repeats_per_prb;
 
 template <typename prb_t, typename perf_report_t, typename create_func_t,
diff --git a/tests/benchdnn/shuffle/bench_shuffle.cpp b/tests/benchdnn/shuffle/bench_shuffle.cpp
index a4d68e51096..10ba70911f9 100644
--- a/tests/benchdnn/shuffle/bench_shuffle.cpp
+++ b/tests/benchdnn/shuffle/bench_shuffle.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,17 +26,7 @@
 
 namespace shuffle {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/softmax/bench_softmax.cpp b/tests/benchdnn/softmax/bench_softmax.cpp
index 8ea9de9ff11..06bc5860173 100644
--- a/tests/benchdnn/softmax/bench_softmax.cpp
+++ b/tests/benchdnn/softmax/bench_softmax.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@
 
 namespace softmax {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/sum/bench_sum.cpp b/tests/benchdnn/sum/bench_sum.cpp
index 0c2591c203c..40287851f12 100644
--- a/tests/benchdnn/sum/bench_sum.cpp
+++ b/tests/benchdnn/sum/bench_sum.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,17 +25,7 @@
 
 namespace sum {
 
-using create_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using check_cache_func_t = std::function<int(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, const prb_t *,
-        res_t *)>;
-using do_func_t = std::function<int(
-        const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &,
-        const prb_t *, res_t *)>;
-using driver_task_executor_t = task_executor_t<prb_t, perf_report_t,
-        create_func_t, check_cache_func_t, do_func_t>;
+TASK_EXECUTOR_DECL_TYPES;
 
 void check_correctness(
         const settings_t &s, driver_task_executor_t &task_executor) {
diff --git a/tests/benchdnn/utils/task_executor.hpp b/tests/benchdnn/utils/task_executor.hpp
index 5fe00dc300c..fdd9ac3a84a 100644
--- a/tests/benchdnn/utils/task_executor.hpp
+++ b/tests/benchdnn/utils/task_executor.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -20,6 +20,21 @@
 #include "utils/parallel.hpp"
 #include "utils/task.hpp"
 
+// A macro serves an unification purpose.
+// It must be a macro due to `prb_t` type is unique per driver.
+#define TASK_EXECUTOR_DECL_TYPES \
+    using create_func_t = std::function<int( \
+            std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t *, res_t *)>; \
+    using check_cache_func_t = std::function<int( \
+            std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t *, res_t *)>; \
+    using do_func_t = std::function<int( \
+            const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
+            const prb_t *, res_t *)>; \
+    using driver_task_executor_t = task_executor_t<prb_t, perf_report_t, \
+            create_func_t, check_cache_func_t, do_func_t>;
+
 extern int repeats_per_prb;
 
 template <typename prb_t, typename perf_report_t, typename create_func_t,

From 8a41f6f35f6375913426d1072435a576750f39be Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 12 Feb 2025 10:07:23 -0800
Subject: [PATCH 10/18] benchdnn: styling: rename check_cache_func into
 check_func

---
 tests/benchdnn/binary/bench_binary.cpp        |  3 +-
 tests/benchdnn/binary/binary.cpp              |  3 +-
 tests/benchdnn/binary/binary.hpp              |  5 ++--
 tests/benchdnn/bnorm/bench_bnorm.cpp          |  3 +-
 tests/benchdnn/bnorm/bnorm.cpp                |  5 ++--
 tests/benchdnn/bnorm/bnorm.hpp                |  5 ++--
 tests/benchdnn/concat/bench_concat.cpp        |  3 +-
 tests/benchdnn/concat/concat.cpp              |  5 ++--
 tests/benchdnn/concat/concat.hpp              |  5 ++--
 tests/benchdnn/conv/bench_conv.cpp            |  8 ++---
 tests/benchdnn/conv/conv.cpp                  |  3 +-
 tests/benchdnn/conv/conv.hpp                  |  3 +-
 tests/benchdnn/conv/conv_dw_fusion.cpp        |  3 +-
 tests/benchdnn/conv/conv_dw_fusion.hpp        |  5 ++--
 tests/benchdnn/deconv/bench_deconv.cpp        |  3 +-
 tests/benchdnn/deconv/deconv.cpp              |  3 +-
 tests/benchdnn/deconv/deconv.hpp              |  3 +-
 tests/benchdnn/eltwise/bench_eltwise.cpp      |  3 +-
 tests/benchdnn/eltwise/eltwise.cpp            |  5 ++--
 tests/benchdnn/eltwise/eltwise.hpp            |  5 ++--
 tests/benchdnn/gnorm/bench_gnorm.cpp          |  3 +-
 tests/benchdnn/gnorm/gnorm.cpp                |  5 ++--
 tests/benchdnn/gnorm/gnorm.hpp                |  5 ++--
 tests/benchdnn/ip/bench_ip.cpp                |  3 +-
 tests/benchdnn/ip/ip.cpp                      |  3 +-
 tests/benchdnn/ip/ip.hpp                      |  3 +-
 tests/benchdnn/lnorm/bench_lnorm.cpp          |  3 +-
 tests/benchdnn/lnorm/lnorm.cpp                |  5 ++--
 tests/benchdnn/lnorm/lnorm.hpp                |  5 ++--
 tests/benchdnn/lrn/bench_lrn.cpp              |  3 +-
 tests/benchdnn/lrn/lrn.cpp                    |  5 ++--
 tests/benchdnn/lrn/lrn.hpp                    |  5 ++--
 tests/benchdnn/matmul/bench_matmul.cpp        |  3 +-
 tests/benchdnn/matmul/matmul.cpp              |  3 +-
 tests/benchdnn/matmul/matmul.hpp              |  5 ++--
 tests/benchdnn/pool/bench_pool.cpp            |  3 +-
 tests/benchdnn/pool/pool.cpp                  |  5 ++--
 tests/benchdnn/pool/pool.hpp                  |  5 ++--
 tests/benchdnn/prelu/bench_prelu.cpp          |  3 +-
 tests/benchdnn/prelu/prelu.cpp                |  5 ++--
 tests/benchdnn/prelu/prelu.hpp                |  5 ++--
 tests/benchdnn/reduction/bench_reduction.cpp  |  3 +-
 tests/benchdnn/reduction/reduction.cpp        |  5 ++--
 tests/benchdnn/reduction/reduction.hpp        |  5 ++--
 tests/benchdnn/reorder/bench_reorder.cpp      |  3 +-
 tests/benchdnn/reorder/reorder.cpp            |  5 ++--
 tests/benchdnn/reorder/reorder.hpp            |  5 ++--
 .../benchdnn/resampling/bench_resampling.cpp  |  3 +-
 tests/benchdnn/resampling/resampling.cpp      |  5 ++--
 tests/benchdnn/resampling/resampling.hpp      |  5 ++--
 tests/benchdnn/rnn/bench_rnn.cpp              |  2 +-
 tests/benchdnn/rnn/rnn.cpp                    |  3 +-
 tests/benchdnn/rnn/rnn.hpp                    |  3 +-
 tests/benchdnn/shuffle/bench_shuffle.cpp      |  3 +-
 tests/benchdnn/shuffle/shuffle.cpp            |  5 ++--
 tests/benchdnn/shuffle/shuffle.hpp            |  5 ++--
 tests/benchdnn/softmax/bench_softmax.cpp      |  3 +-
 tests/benchdnn/softmax/softmax.cpp            |  3 +-
 tests/benchdnn/softmax/softmax.hpp            |  5 ++--
 tests/benchdnn/sum/bench_sum.cpp              |  3 +-
 tests/benchdnn/sum/sum.cpp                    |  5 ++--
 tests/benchdnn/sum/sum.hpp                    |  5 ++--
 tests/benchdnn/utils/task.hpp                 | 30 +++++++++++--------
 tests/benchdnn/utils/task_executor.hpp        | 17 +++++------
 64 files changed, 121 insertions(+), 176 deletions(-)

diff --git a/tests/benchdnn/binary/bench_binary.cpp b/tests/benchdnn/binary/bench_binary.cpp
index ca318788df9..af3e8f1d412 100644
--- a/tests/benchdnn/binary/bench_binary.cpp
+++ b/tests/benchdnn/binary/bench_binary.cpp
@@ -42,8 +42,7 @@ void check_correctness(
                 i_inplace, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/binary/binary.cpp b/tests/benchdnn/binary/binary.cpp
index 8f1ccd3148f..d559e91cfca 100644
--- a/tests/benchdnn/binary/binary.cpp
+++ b/tests/benchdnn/binary/binary.cpp
@@ -295,8 +295,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/binary/binary.hpp b/tests/benchdnn/binary/binary.hpp
index b4c1264d21d..428453f2077 100644
--- a/tests/benchdnn/binary/binary.hpp
+++ b/tests/benchdnn/binary/binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -167,8 +167,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/bnorm/bench_bnorm.cpp b/tests/benchdnn/bnorm/bench_bnorm.cpp
index dcce5ec342d..ee731fa1cc3 100644
--- a/tests/benchdnn/bnorm/bench_bnorm.cpp
+++ b/tests/benchdnn/bnorm/bench_bnorm.cpp
@@ -45,8 +45,7 @@ void check_correctness(
                 i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/bnorm/bnorm.cpp b/tests/benchdnn/bnorm/bnorm.cpp
index 241fa1e4dfe..32996c0453f 100644
--- a/tests/benchdnn/bnorm/bnorm.cpp
+++ b/tests/benchdnn/bnorm/bnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -718,8 +718,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/bnorm/bnorm.hpp b/tests/benchdnn/bnorm/bnorm.hpp
index d3669af9686..922eb6a99b7 100644
--- a/tests/benchdnn/bnorm/bnorm.hpp
+++ b/tests/benchdnn/bnorm/bnorm.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -278,8 +278,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/concat/bench_concat.cpp b/tests/benchdnn/concat/bench_concat.cpp
index 32ddf58ff44..bec5257d15e 100644
--- a/tests/benchdnn/concat/bench_concat.cpp
+++ b/tests/benchdnn/concat/bench_concat.cpp
@@ -41,8 +41,7 @@ void check_correctness(
                 i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/concat/concat.cpp b/tests/benchdnn/concat/concat.cpp
index 323a1506e71..0e3a8ecdc82 100644
--- a/tests/benchdnn/concat/concat.cpp
+++ b/tests/benchdnn/concat/concat.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -196,8 +196,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     // Assume it doesn't change through the execution.
     static int capacity = 0;
diff --git a/tests/benchdnn/concat/concat.hpp b/tests/benchdnn/concat/concat.hpp
index 19aba711c2b..25e1ca56e72 100644
--- a/tests/benchdnn/concat/concat.hpp
+++ b/tests/benchdnn/concat/concat.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -176,8 +176,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/conv/bench_conv.cpp b/tests/benchdnn/conv/bench_conv.cpp
index 76a2b7a7b93..e2f42757962 100644
--- a/tests/benchdnn/conv/bench_conv.cpp
+++ b/tests/benchdnn/conv/bench_conv.cpp
@@ -66,11 +66,11 @@ void check_correctness(
         bool has_dw_po = i_attr.post_ops.convolution_index() >= 0;
         auto &conv_createit
                 = has_dw_po ? conv_dw_fusion::createit : conv::createit;
-        auto &conv_check_cacheit = has_dw_po ? conv_dw_fusion::check_cacheit
-                                             : conv::check_cacheit;
+        auto &conv_checkit
+                = has_dw_po ? conv_dw_fusion::checkit : conv::checkit;
         auto &conv_doit = has_dw_po ? conv_dw_fusion::doit : conv::doit;
-        task_executor.submit(prb, s.perf_template, conv_createit,
-                conv_check_cacheit, conv_doit);
+        task_executor.submit(
+                prb, s.perf_template, conv_createit, conv_checkit, conv_doit);
     }
 }
 
diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp
index b8e725b6eb3..33f22398469 100644
--- a/tests/benchdnn/conv/conv.cpp
+++ b/tests/benchdnn/conv/conv.cpp
@@ -577,8 +577,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/conv/conv.hpp b/tests/benchdnn/conv/conv.hpp
index 5bb1098fd6b..8001865281e 100644
--- a/tests/benchdnn/conv/conv.hpp
+++ b/tests/benchdnn/conv/conv.hpp
@@ -312,8 +312,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/conv/conv_dw_fusion.cpp b/tests/benchdnn/conv/conv_dw_fusion.cpp
index 6ba4ea5c37b..2ad9959760b 100644
--- a/tests/benchdnn/conv/conv_dw_fusion.cpp
+++ b/tests/benchdnn/conv/conv_dw_fusion.cpp
@@ -306,8 +306,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
 
diff --git a/tests/benchdnn/conv/conv_dw_fusion.hpp b/tests/benchdnn/conv/conv_dw_fusion.hpp
index e0f6832f218..f91db2afd3a 100644
--- a/tests/benchdnn/conv/conv_dw_fusion.hpp
+++ b/tests/benchdnn/conv/conv_dw_fusion.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2023 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -36,8 +36,7 @@ using cfg_t = conv::cfg_t;
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/deconv/bench_deconv.cpp b/tests/benchdnn/deconv/bench_deconv.cpp
index 2cd2cdcadd6..fb77adcfc70 100644
--- a/tests/benchdnn/deconv/bench_deconv.cpp
+++ b/tests/benchdnn/deconv/bench_deconv.cpp
@@ -60,8 +60,7 @@ void check_correctness(
                 i_alg, i_mb, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp
index ac7c08fdb2d..8e76d1ff6b6 100644
--- a/tests/benchdnn/deconv/deconv.cpp
+++ b/tests/benchdnn/deconv/deconv.cpp
@@ -554,8 +554,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/deconv/deconv.hpp b/tests/benchdnn/deconv/deconv.hpp
index ad2a109c032..be4ed25b1e7 100644
--- a/tests/benchdnn/deconv/deconv.hpp
+++ b/tests/benchdnn/deconv/deconv.hpp
@@ -310,8 +310,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/eltwise/bench_eltwise.cpp b/tests/benchdnn/eltwise/bench_eltwise.cpp
index c6ae77c5a2e..d3e03940787 100644
--- a/tests/benchdnn/eltwise/bench_eltwise.cpp
+++ b/tests/benchdnn/eltwise/bench_eltwise.cpp
@@ -46,8 +46,7 @@ void check_correctness(
                 i_mb, i_inplace, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/eltwise/eltwise.cpp b/tests/benchdnn/eltwise/eltwise.cpp
index 34d8421009b..fdc8edd83c0 100644
--- a/tests/benchdnn/eltwise/eltwise.cpp
+++ b/tests/benchdnn/eltwise/eltwise.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -498,8 +498,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/eltwise/eltwise.hpp b/tests/benchdnn/eltwise/eltwise.hpp
index f94c7085012..35166cbb353 100644
--- a/tests/benchdnn/eltwise/eltwise.hpp
+++ b/tests/benchdnn/eltwise/eltwise.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -172,8 +172,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/gnorm/bench_gnorm.cpp b/tests/benchdnn/gnorm/bench_gnorm.cpp
index 08a24b5f3cf..212d92d02fd 100644
--- a/tests/benchdnn/gnorm/bench_gnorm.cpp
+++ b/tests/benchdnn/gnorm/bench_gnorm.cpp
@@ -42,8 +42,7 @@ void check_correctness(
                 i_inplace, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/gnorm/gnorm.cpp b/tests/benchdnn/gnorm/gnorm.cpp
index d43af08daf2..9b86b4f1a5c 100644
--- a/tests/benchdnn/gnorm/gnorm.cpp
+++ b/tests/benchdnn/gnorm/gnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -703,8 +703,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     return OK;
diff --git a/tests/benchdnn/gnorm/gnorm.hpp b/tests/benchdnn/gnorm/gnorm.hpp
index ef6660fb10d..fd1bf14d7de 100644
--- a/tests/benchdnn/gnorm/gnorm.hpp
+++ b/tests/benchdnn/gnorm/gnorm.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -280,8 +280,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/ip/bench_ip.cpp b/tests/benchdnn/ip/bench_ip.cpp
index 55ebd04b78d..a689aaee4fb 100644
--- a/tests/benchdnn/ip/bench_ip.cpp
+++ b/tests/benchdnn/ip/bench_ip.cpp
@@ -57,8 +57,7 @@ void check_correctness(
                 i_mb, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/ip/ip.cpp b/tests/benchdnn/ip/ip.cpp
index acc87fa3216..b5a2db3087b 100644
--- a/tests/benchdnn/ip/ip.cpp
+++ b/tests/benchdnn/ip/ip.cpp
@@ -408,8 +408,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/ip/ip.hpp b/tests/benchdnn/ip/ip.hpp
index db327ccdfd6..f41944f8a19 100644
--- a/tests/benchdnn/ip/ip.hpp
+++ b/tests/benchdnn/ip/ip.hpp
@@ -230,8 +230,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/lnorm/bench_lnorm.cpp b/tests/benchdnn/lnorm/bench_lnorm.cpp
index 6a60d7ecca5..60971758bc6 100644
--- a/tests/benchdnn/lnorm/bench_lnorm.cpp
+++ b/tests/benchdnn/lnorm/bench_lnorm.cpp
@@ -48,8 +48,7 @@ void check_correctness(
                 s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/lnorm/lnorm.cpp b/tests/benchdnn/lnorm/lnorm.cpp
index cb4f911e997..1d6a01997c8 100644
--- a/tests/benchdnn/lnorm/lnorm.cpp
+++ b/tests/benchdnn/lnorm/lnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 * Copyright 2024 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -663,8 +663,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/lnorm/lnorm.hpp b/tests/benchdnn/lnorm/lnorm.hpp
index c1140e16813..e608ae2979b 100644
--- a/tests/benchdnn/lnorm/lnorm.hpp
+++ b/tests/benchdnn/lnorm/lnorm.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -265,8 +265,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/lrn/bench_lrn.cpp b/tests/benchdnn/lrn/bench_lrn.cpp
index 3cb4de49af4..d779ab27b90 100644
--- a/tests/benchdnn/lrn/bench_lrn.cpp
+++ b/tests/benchdnn/lrn/bench_lrn.cpp
@@ -42,8 +42,7 @@ void check_correctness(
                 i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/lrn/lrn.cpp b/tests/benchdnn/lrn/lrn.cpp
index 9c88530ffca..5f037b49729 100644
--- a/tests/benchdnn/lrn/lrn.cpp
+++ b/tests/benchdnn/lrn/lrn.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -207,8 +207,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/lrn/lrn.hpp b/tests/benchdnn/lrn/lrn.hpp
index 359982ca84e..eda7d20f348 100644
--- a/tests/benchdnn/lrn/lrn.hpp
+++ b/tests/benchdnn/lrn/lrn.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -194,8 +194,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/matmul/bench_matmul.cpp b/tests/benchdnn/matmul/bench_matmul.cpp
index 6ae452bcb1e..2f010ecc750 100644
--- a/tests/benchdnn/matmul/bench_matmul.cpp
+++ b/tests/benchdnn/matmul/bench_matmul.cpp
@@ -60,8 +60,7 @@ void check_correctness(
                 i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/matmul/matmul.cpp b/tests/benchdnn/matmul/matmul.cpp
index 75d5abbae25..bd7d947bf44 100644
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@@ -882,8 +882,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/matmul/matmul.hpp b/tests/benchdnn/matmul/matmul.hpp
index 6b222be5262..9b23aab8d19 100644
--- a/tests/benchdnn/matmul/matmul.hpp
+++ b/tests/benchdnn/matmul/matmul.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -303,8 +303,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/pool/bench_pool.cpp b/tests/benchdnn/pool/bench_pool.cpp
index f2a2f390923..2c012f9749a 100644
--- a/tests/benchdnn/pool/bench_pool.cpp
+++ b/tests/benchdnn/pool/bench_pool.cpp
@@ -42,8 +42,7 @@ void check_correctness(
                 i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/pool/pool.cpp b/tests/benchdnn/pool/pool.cpp
index 6f4f318a3a2..6ab707c498f 100644
--- a/tests/benchdnn/pool/pool.cpp
+++ b/tests/benchdnn/pool/pool.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 * Copyright 2022-2023 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -341,8 +341,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/pool/pool.hpp b/tests/benchdnn/pool/pool.hpp
index 47de9adc3c3..b18d68a2d21 100644
--- a/tests/benchdnn/pool/pool.hpp
+++ b/tests/benchdnn/pool/pool.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -294,8 +294,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/prelu/bench_prelu.cpp b/tests/benchdnn/prelu/bench_prelu.cpp
index e79ce4d65cb..e26e7eb819a 100644
--- a/tests/benchdnn/prelu/bench_prelu.cpp
+++ b/tests/benchdnn/prelu/bench_prelu.cpp
@@ -39,8 +39,7 @@ void check_correctness(
                 i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/prelu/prelu.cpp b/tests/benchdnn/prelu/prelu.cpp
index 1407b52a760..a53054dc9a8 100644
--- a/tests/benchdnn/prelu/prelu.cpp
+++ b/tests/benchdnn/prelu/prelu.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -236,8 +236,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/prelu/prelu.hpp b/tests/benchdnn/prelu/prelu.hpp
index 80a8f63f4f9..cc00fd197c5 100644
--- a/tests/benchdnn/prelu/prelu.hpp
+++ b/tests/benchdnn/prelu/prelu.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -153,8 +153,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/reduction/bench_reduction.cpp b/tests/benchdnn/reduction/bench_reduction.cpp
index 0e031cb2474..95bef6c7ec0 100644
--- a/tests/benchdnn/reduction/bench_reduction.cpp
+++ b/tests/benchdnn/reduction/bench_reduction.cpp
@@ -39,8 +39,7 @@ void check_correctness(
                 i_eps, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/reduction/reduction.cpp b/tests/benchdnn/reduction/reduction.cpp
index 6a13cbdfd5a..ed4e9487e41 100644
--- a/tests/benchdnn/reduction/reduction.cpp
+++ b/tests/benchdnn/reduction/reduction.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -322,8 +322,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/reduction/reduction.hpp b/tests/benchdnn/reduction/reduction.hpp
index a56329a6799..89d6690fb32 100644
--- a/tests/benchdnn/reduction/reduction.hpp
+++ b/tests/benchdnn/reduction/reduction.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -184,8 +184,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/reorder/bench_reorder.cpp b/tests/benchdnn/reorder/bench_reorder.cpp
index 5badbc08ed6..8bdeddc8263 100644
--- a/tests/benchdnn/reorder/bench_reorder.cpp
+++ b/tests/benchdnn/reorder/bench_reorder.cpp
@@ -44,8 +44,7 @@ void check_correctness(
                 i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/reorder/reorder.cpp b/tests/benchdnn/reorder/reorder.cpp
index 237e033981b..33f3c3e62be 100644
--- a/tests/benchdnn/reorder/reorder.cpp
+++ b/tests/benchdnn/reorder/reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -488,8 +488,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/reorder/reorder.hpp b/tests/benchdnn/reorder/reorder.hpp
index 60c8f05d9ed..1fb66c30acf 100644
--- a/tests/benchdnn/reorder/reorder.hpp
+++ b/tests/benchdnn/reorder/reorder.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -206,8 +206,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/resampling/bench_resampling.cpp b/tests/benchdnn/resampling/bench_resampling.cpp
index d9617bbc94a..e753e7e2891 100644
--- a/tests/benchdnn/resampling/bench_resampling.cpp
+++ b/tests/benchdnn/resampling/bench_resampling.cpp
@@ -43,8 +43,7 @@ void check_correctness(
                 i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/resampling/resampling.cpp b/tests/benchdnn/resampling/resampling.cpp
index dfe6296841a..690d01569c1 100644
--- a/tests/benchdnn/resampling/resampling.cpp
+++ b/tests/benchdnn/resampling/resampling.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -249,8 +249,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/resampling/resampling.hpp b/tests/benchdnn/resampling/resampling.hpp
index 05c5ca0526c..15b0b85f0e8 100644
--- a/tests/benchdnn/resampling/resampling.hpp
+++ b/tests/benchdnn/resampling/resampling.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -208,8 +208,7 @@ int fill_dat(
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/rnn/bench_rnn.cpp b/tests/benchdnn/rnn/bench_rnn.cpp
index 25a02b6e71c..c476fc78129 100644
--- a/tests/benchdnn/rnn/bench_rnn.cpp
+++ b/tests/benchdnn/rnn/bench_rnn.cpp
@@ -64,7 +64,7 @@ void check_correctness(
                 i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
 
         task_executor.submit(
-                std::move(prb), s.perf_template, createit, check_cacheit, doit);
+                std::move(prb), s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp
index 4fff6fcfe0f..ea6fc985b8d 100644
--- a/tests/benchdnn/rnn/rnn.cpp
+++ b/tests/benchdnn/rnn/rnn.cpp
@@ -1251,8 +1251,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     SAFE(check_caches(v_prim[0], prb, res), WARN);
     if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/rnn/rnn.hpp b/tests/benchdnn/rnn/rnn.hpp
index e66f85086b5..9051bdc4373 100644
--- a/tests/benchdnn/rnn/rnn.hpp
+++ b/tests/benchdnn/rnn/rnn.hpp
@@ -592,8 +592,7 @@ void compute_ref_bwd(const prb_t &prb, const args_t &args);
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t &prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t &prb, res_t *res);
diff --git a/tests/benchdnn/shuffle/bench_shuffle.cpp b/tests/benchdnn/shuffle/bench_shuffle.cpp
index 10ba70911f9..e3a29edf2b6 100644
--- a/tests/benchdnn/shuffle/bench_shuffle.cpp
+++ b/tests/benchdnn/shuffle/bench_shuffle.cpp
@@ -42,8 +42,7 @@ void check_correctness(
                 i_ctx_init, i_ctx_exe, s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/shuffle/shuffle.cpp b/tests/benchdnn/shuffle/shuffle.cpp
index 243bcf6d271..bb24a565eb6 100644
--- a/tests/benchdnn/shuffle/shuffle.cpp
+++ b/tests/benchdnn/shuffle/shuffle.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -181,8 +181,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/shuffle/shuffle.hpp b/tests/benchdnn/shuffle/shuffle.hpp
index fd0f086f2ce..c1121268eca 100644
--- a/tests/benchdnn/shuffle/shuffle.hpp
+++ b/tests/benchdnn/shuffle/shuffle.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -161,8 +161,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/softmax/bench_softmax.cpp b/tests/benchdnn/softmax/bench_softmax.cpp
index 06bc5860173..84805432485 100644
--- a/tests/benchdnn/softmax/bench_softmax.cpp
+++ b/tests/benchdnn/softmax/bench_softmax.cpp
@@ -46,8 +46,7 @@ void check_correctness(
                 s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/softmax/softmax.cpp b/tests/benchdnn/softmax/softmax.cpp
index 25aa6c354a2..2deae687717 100644
--- a/tests/benchdnn/softmax/softmax.cpp
+++ b/tests/benchdnn/softmax/softmax.cpp
@@ -413,8 +413,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/softmax/softmax.hpp b/tests/benchdnn/softmax/softmax.hpp
index 36046e06fd0..4f4cde3f4ee 100644
--- a/tests/benchdnn/softmax/softmax.hpp
+++ b/tests/benchdnn/softmax/softmax.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -204,8 +204,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/sum/bench_sum.cpp b/tests/benchdnn/sum/bench_sum.cpp
index 40287851f12..4cd03ae08d2 100644
--- a/tests/benchdnn/sum/bench_sum.cpp
+++ b/tests/benchdnn/sum/bench_sum.cpp
@@ -43,8 +43,7 @@ void check_correctness(
                 s.impl_filter);
         if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
 
-        task_executor.submit(
-                prb, s.perf_template, createit, check_cacheit, doit);
+        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
     }
 }
 
diff --git a/tests/benchdnn/sum/sum.cpp b/tests/benchdnn/sum/sum.cpp
index 0d4af206a19..4d14a96fa84 100644
--- a/tests/benchdnn/sum/sum.cpp
+++ b/tests/benchdnn/sum/sum.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -174,8 +174,7 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
     return OK;
 }
 
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     return check_caches(v_prim[0], prb, res);
 }
diff --git a/tests/benchdnn/sum/sum.hpp b/tests/benchdnn/sum/sum.hpp
index 926fa6f745b..5775e14db77 100644
--- a/tests/benchdnn/sum/sum.hpp
+++ b/tests/benchdnn/sum/sum.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -175,8 +175,7 @@ void compute_ref(const prb_t *prb, const args_t &args,
 
 int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
-int check_cacheit(
-        std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
+int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res);
diff --git a/tests/benchdnn/utils/task.hpp b/tests/benchdnn/utils/task.hpp
index 815b0c64900..e18e0fc3c90 100644
--- a/tests/benchdnn/utils/task.hpp
+++ b/tests/benchdnn/utils/task.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,15 +26,14 @@
 #include "utils/wrapper.hpp"
 
 template <typename prb_t, typename perf_report_t, typename create_func_t,
-        typename check_cache_func_t, typename do_func_t>
+        typename check_func_t, typename do_func_t>
 struct task_t {
     task_t(const prb_t &prb, const std::string &perf_template,
-            const create_func_t &create_func,
-            const check_cache_func_t &check_cache_func,
+            const create_func_t &create_func, const check_func_t &check_func,
             const do_func_t &do_func, int idx)
         : prb_(std::move(prb))
         , create_func_(create_func)
-        , check_cache_func_(check_cache_func)
+        , check_func_(check_func)
         , do_func_(do_func)
         , perf_template_(perf_template)
         , idx_(idx) {}
@@ -50,20 +49,27 @@ struct task_t {
         return OK;
     }
 
-    // Since task_t doesn't have a control over primitives, it has to pass this
-    // control to a driver which is aware of what primitives should be checked
-    // for being in the cache.
-    int check_cache() {
+    // Since `task_t` doesn't have control over primitives, it delegates the
+    // primitive-based checks to the driver.
+    int check() {
         if (!has_bench_mode_bit(mode_bit_t::corr)) return OK;
+        // No alive testing objects - no checks.
         if (res_.state != INITIALIZED) return OK;
 
-        return check_cache_func_(*v_prim_, &prb_, &res_);
+        return check_func_(*v_prim_, &prb_, &res_);
     }
 
     int exec() {
-        BENCHDNN_PRINT(1, "run: %s\n", prb_.str());
+        // Checking for `INITIALIZED` state here prevents from `SKIPPED`
+        // problems being executed.
         if (res_.state == INITIALIZED && bench_mode != bench_mode_t::init) {
+            // Differentiate a message when the run happens...
+            BENCHDNN_PRINT(1, "run: %s\n", prb_.str());
             do_func_(*v_prim_, &prb_, &res_);
+        } else {
+            // ... versus when it didn't but still indicating the problem went
+            // through this part of the flow.
+            BENCHDNN_PRINT(1, "run (just report, no exec): %s\n", prb_.str());
         }
 
         return report();
@@ -72,7 +78,7 @@ struct task_t {
 private:
     prb_t prb_;
     create_func_t create_func_;
-    check_cache_func_t check_cache_func_;
+    check_func_t check_func_;
     do_func_t do_func_;
     std::string perf_template_;
     res_t res_ {};
diff --git a/tests/benchdnn/utils/task_executor.hpp b/tests/benchdnn/utils/task_executor.hpp
index fdd9ac3a84a..ddd19973ac8 100644
--- a/tests/benchdnn/utils/task_executor.hpp
+++ b/tests/benchdnn/utils/task_executor.hpp
@@ -26,30 +26,29 @@
     using create_func_t = std::function<int( \
             std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
             const prb_t *, res_t *)>; \
-    using check_cache_func_t = std::function<int( \
+    using check_func_t = std::function<int( \
             std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
             const prb_t *, res_t *)>; \
     using do_func_t = std::function<int( \
             const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &, \
             const prb_t *, res_t *)>; \
     using driver_task_executor_t = task_executor_t<prb_t, perf_report_t, \
-            create_func_t, check_cache_func_t, do_func_t>;
+            create_func_t, check_func_t, do_func_t>;
 
 extern int repeats_per_prb;
 
 template <typename prb_t, typename perf_report_t, typename create_func_t,
-        typename check_cache_func_t, typename do_func_t>
+        typename check_func_t, typename do_func_t>
 struct task_executor_t {
     virtual ~task_executor_t() { assert(tasks_.empty()); }
 
     void submit(const prb_t &prb, const std::string &perf_template,
-            const create_func_t &create_func,
-            const check_cache_func_t &check_cache_func,
+            const create_func_t &create_func, const check_func_t &check_func,
             const do_func_t &do_func) {
         static const int nthreads = benchdnn_get_max_threads();
         for (int r = 0; r < repeats_per_prb; r++) {
-            tasks_.emplace_back(prb, perf_template, create_func,
-                    check_cache_func, do_func, get_idx());
+            tasks_.emplace_back(prb, perf_template, create_func, check_func,
+                    do_func, get_idx());
             if (has_bench_mode_modifier(mode_modifier_t::par_create)
                     && static_cast<int>(tasks_.size()) < nthreads)
                 continue;
@@ -68,7 +67,7 @@ struct task_executor_t {
 
         // Check caches first to avoid filling cache with service reorders.
         for (auto &t : tasks_) {
-            t.check_cache();
+            t.check();
         }
 
         for (auto &t : tasks_) {
@@ -78,7 +77,7 @@ struct task_executor_t {
         tasks_.clear();
     }
 
-    std::vector<task_t<prb_t, perf_report_t, create_func_t, check_cache_func_t,
+    std::vector<task_t<prb_t, perf_report_t, create_func_t, check_func_t,
             do_func_t>>
             tasks_;
 

From e47a4a04ae1708b4fe4cee1b04d0288efa68b303 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 12 Feb 2025 10:30:41 -0800
Subject: [PATCH 11/18] benchdnn: move scope for checks inside checkit func

---
 tests/benchdnn/binary/binary.cpp         |  5 ++++-
 tests/benchdnn/bnorm/bnorm.cpp           |  6 ++++--
 tests/benchdnn/concat/concat.cpp         | 27 ++++++++++++++----------
 tests/benchdnn/conv/conv.cpp             |  6 ++++--
 tests/benchdnn/deconv/deconv.cpp         |  6 ++++--
 tests/benchdnn/eltwise/eltwise.cpp       |  6 ++++--
 tests/benchdnn/gnorm/gnorm.cpp           |  4 +++-
 tests/benchdnn/ip/ip.cpp                 |  6 ++++--
 tests/benchdnn/lnorm/lnorm.cpp           |  5 ++++-
 tests/benchdnn/lrn/lrn.cpp               |  6 ++++--
 tests/benchdnn/matmul/matmul.cpp         |  6 ++++--
 tests/benchdnn/pool/pool.cpp             |  6 ++++--
 tests/benchdnn/prelu/prelu.cpp           |  5 ++++-
 tests/benchdnn/reduction/reduction.cpp   |  5 ++++-
 tests/benchdnn/reorder/reorder.cpp       |  5 ++++-
 tests/benchdnn/resampling/resampling.cpp |  5 ++++-
 tests/benchdnn/rnn/rnn.cpp               |  6 ++++--
 tests/benchdnn/shuffle/shuffle.cpp       |  5 ++++-
 tests/benchdnn/softmax/softmax.cpp       |  5 ++++-
 tests/benchdnn/sum/sum.cpp               |  5 ++++-
 tests/benchdnn/utils/task.hpp            |  1 -
 21 files changed, 91 insertions(+), 40 deletions(-)

diff --git a/tests/benchdnn/binary/binary.cpp b/tests/benchdnn/binary/binary.cpp
index d559e91cfca..5b3f8d8c96f 100644
--- a/tests/benchdnn/binary/binary.cpp
+++ b/tests/benchdnn/binary/binary.cpp
@@ -297,7 +297,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/bnorm/bnorm.cpp b/tests/benchdnn/bnorm/bnorm.cpp
index 32996c0453f..f6e659ca3b3 100644
--- a/tests/benchdnn/bnorm/bnorm.cpp
+++ b/tests/benchdnn/bnorm/bnorm.cpp
@@ -720,8 +720,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/concat/concat.cpp b/tests/benchdnn/concat/concat.cpp
index 0e3a8ecdc82..b4020e635ce 100644
--- a/tests/benchdnn/concat/concat.cpp
+++ b/tests/benchdnn/concat/concat.cpp
@@ -198,18 +198,23 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    // Assume it doesn't change through the execution.
-    static int capacity = 0;
-    static auto st = dnnl_get_primitive_cache_capacity(&capacity);
-    if (st != dnnl_success) return FAIL;
-    if (capacity > 0 && prb->n_inputs() + 1 > capacity) {
-        BENCHDNN_PRINT(2, "%s\n",
-                "[INFO] The number of potential internal reorder pds plus "
-                "concat itself exceeds the cache capacity which will lead to a "
-                "test case false-positive failure.");
-        return OK;
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        // The assumtion is the capacity doesn't change through the execution.
+        static int capacity = 0;
+        static auto st = dnnl_get_primitive_cache_capacity(&capacity);
+        if (st != dnnl_success) return FAIL;
+
+        if (capacity > 0 && prb->n_inputs() + 1 > capacity) {
+            BENCHDNN_PRINT(2, "%s\n",
+                    "[INFO] The number of potential internal reorder pds plus "
+                    "concat itself exceeds the cache capacity which will lead "
+                    "to a test case false-positive failure.");
+            return OK;
+        }
+
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
-    return check_caches(v_prim[0], prb, res);
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp
index 33f22398469..1407431be62 100644
--- a/tests/benchdnn/conv/conv.cpp
+++ b/tests/benchdnn/conv/conv.cpp
@@ -579,8 +579,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    // Don't check caches for CPU prim as the reference.
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        // Don't check caches for CPU prim as the reference.
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp
index 8e76d1ff6b6..c5f47f8b914 100644
--- a/tests/benchdnn/deconv/deconv.cpp
+++ b/tests/benchdnn/deconv/deconv.cpp
@@ -556,8 +556,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    // Don't check caches for CPU prim as the reference.
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        // Don't check caches for CPU prim as the reference.
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/eltwise/eltwise.cpp b/tests/benchdnn/eltwise/eltwise.cpp
index fdc8edd83c0..a287c779d9b 100644
--- a/tests/benchdnn/eltwise/eltwise.cpp
+++ b/tests/benchdnn/eltwise/eltwise.cpp
@@ -500,8 +500,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/gnorm/gnorm.cpp b/tests/benchdnn/gnorm/gnorm.cpp
index 9b86b4f1a5c..828e822d0ce 100644
--- a/tests/benchdnn/gnorm/gnorm.cpp
+++ b/tests/benchdnn/gnorm/gnorm.cpp
@@ -705,7 +705,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/ip/ip.cpp b/tests/benchdnn/ip/ip.cpp
index b5a2db3087b..1594aa5b6c4 100644
--- a/tests/benchdnn/ip/ip.cpp
+++ b/tests/benchdnn/ip/ip.cpp
@@ -410,8 +410,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    // Don't check caches for CPU prim as the reference.
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        // Don't check caches for CPU prim as the reference.
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/lnorm/lnorm.cpp b/tests/benchdnn/lnorm/lnorm.cpp
index 1d6a01997c8..30397a872f3 100644
--- a/tests/benchdnn/lnorm/lnorm.cpp
+++ b/tests/benchdnn/lnorm/lnorm.cpp
@@ -665,7 +665,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/lrn/lrn.cpp b/tests/benchdnn/lrn/lrn.cpp
index 5f037b49729..7346f68d785 100644
--- a/tests/benchdnn/lrn/lrn.cpp
+++ b/tests/benchdnn/lrn/lrn.cpp
@@ -209,8 +209,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/matmul/matmul.cpp b/tests/benchdnn/matmul/matmul.cpp
index bd7d947bf44..269d956d775 100644
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@@ -884,8 +884,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    // Don't check caches for CPU prim as the reference.
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        // Don't check caches for CPU prim as the reference.
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/pool/pool.cpp b/tests/benchdnn/pool/pool.cpp
index 6ab707c498f..a807a90c644 100644
--- a/tests/benchdnn/pool/pool.cpp
+++ b/tests/benchdnn/pool/pool.cpp
@@ -343,8 +343,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/prelu/prelu.cpp b/tests/benchdnn/prelu/prelu.cpp
index a53054dc9a8..b838856c1b7 100644
--- a/tests/benchdnn/prelu/prelu.cpp
+++ b/tests/benchdnn/prelu/prelu.cpp
@@ -238,7 +238,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/reduction/reduction.cpp b/tests/benchdnn/reduction/reduction.cpp
index ed4e9487e41..b765fd48c26 100644
--- a/tests/benchdnn/reduction/reduction.cpp
+++ b/tests/benchdnn/reduction/reduction.cpp
@@ -324,7 +324,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/reorder/reorder.cpp b/tests/benchdnn/reorder/reorder.cpp
index 33f3c3e62be..b42d5ab98d4 100644
--- a/tests/benchdnn/reorder/reorder.cpp
+++ b/tests/benchdnn/reorder/reorder.cpp
@@ -490,7 +490,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/resampling/resampling.cpp b/tests/benchdnn/resampling/resampling.cpp
index 690d01569c1..8a0181162e2 100644
--- a/tests/benchdnn/resampling/resampling.cpp
+++ b/tests/benchdnn/resampling/resampling.cpp
@@ -251,7 +251,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp
index ea6fc985b8d..43b037a5f44 100644
--- a/tests/benchdnn/rnn/rnn.cpp
+++ b/tests/benchdnn/rnn/rnn.cpp
@@ -1253,8 +1253,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    SAFE(check_caches(v_prim[0], prb, res), WARN);
-    if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+        if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
+    }
     return OK;
 }
 
diff --git a/tests/benchdnn/shuffle/shuffle.cpp b/tests/benchdnn/shuffle/shuffle.cpp
index bb24a565eb6..a6cb33cb5d9 100644
--- a/tests/benchdnn/shuffle/shuffle.cpp
+++ b/tests/benchdnn/shuffle/shuffle.cpp
@@ -183,7 +183,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/softmax/softmax.cpp b/tests/benchdnn/softmax/softmax.cpp
index 2deae687717..13a5dfa3fe4 100644
--- a/tests/benchdnn/softmax/softmax.cpp
+++ b/tests/benchdnn/softmax/softmax.cpp
@@ -415,7 +415,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/sum/sum.cpp b/tests/benchdnn/sum/sum.cpp
index 4d14a96fa84..e5f1e31a79f 100644
--- a/tests/benchdnn/sum/sum.cpp
+++ b/tests/benchdnn/sum/sum.cpp
@@ -176,7 +176,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
-    return check_caches(v_prim[0], prb, res);
+    if (has_bench_mode_bit(mode_bit_t::corr)) {
+        SAFE(check_caches(v_prim[0], prb, res), WARN);
+    }
+    return OK;
 }
 
 int doit(const std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
diff --git a/tests/benchdnn/utils/task.hpp b/tests/benchdnn/utils/task.hpp
index e18e0fc3c90..7c1a8654e29 100644
--- a/tests/benchdnn/utils/task.hpp
+++ b/tests/benchdnn/utils/task.hpp
@@ -52,7 +52,6 @@ struct task_t {
     // Since `task_t` doesn't have control over primitives, it delegates the
     // primitive-based checks to the driver.
     int check() {
-        if (!has_bench_mode_bit(mode_bit_t::corr)) return OK;
         // No alive testing objects - no checks.
         if (res_.state != INITIALIZED) return OK;
 

From 4c9b36bf2f7ad9f065d519a434f784a80ee941de Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 12 Feb 2025 10:54:49 -0800
Subject: [PATCH 12/18] benchdnn: move memory check into a dedicated check func

---
 tests/benchdnn/binary/binary.cpp         |  3 +++
 tests/benchdnn/bnorm/bnorm.cpp           |  4 ++++
 tests/benchdnn/concat/concat.cpp         |  3 +++
 tests/benchdnn/conv/conv.cpp             |  5 +++++
 tests/benchdnn/deconv/deconv.cpp         |  5 +++++
 tests/benchdnn/dnnl_common.cpp           |  5 ++---
 tests/benchdnn/dnnl_common.hpp           | 27 +++++++++++++++++++++++-
 tests/benchdnn/eltwise/eltwise.cpp       |  4 ++++
 tests/benchdnn/gnorm/gnorm.cpp           |  3 +++
 tests/benchdnn/ip/ip.cpp                 |  5 +++++
 tests/benchdnn/lnorm/lnorm.cpp           |  3 +++
 tests/benchdnn/lrn/lrn.cpp               |  4 ++++
 tests/benchdnn/matmul/matmul.cpp         |  5 +++++
 tests/benchdnn/pool/pool.cpp             |  4 ++++
 tests/benchdnn/prelu/prelu.cpp           |  3 +++
 tests/benchdnn/reduction/reduction.cpp   |  3 +++
 tests/benchdnn/reorder/reorder.cpp       |  3 +++
 tests/benchdnn/resampling/resampling.cpp |  3 +++
 tests/benchdnn/rnn/rnn.cpp               |  4 ++++
 tests/benchdnn/shuffle/shuffle.cpp       |  3 +++
 tests/benchdnn/softmax/softmax.cpp       |  3 +++
 tests/benchdnn/sum/sum.cpp               |  3 +++
 tests/benchdnn/utils/dnnl_query.cpp      |  8 ++++++-
 tests/benchdnn/utils/dnnl_query.hpp      |  3 ++-
 24 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/tests/benchdnn/binary/binary.cpp b/tests/benchdnn/binary/binary.cpp
index 5b3f8d8c96f..3927f24fea6 100644
--- a/tests/benchdnn/binary/binary.cpp
+++ b/tests/benchdnn/binary/binary.cpp
@@ -297,6 +297,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/bnorm/bnorm.cpp b/tests/benchdnn/bnorm/bnorm.cpp
index f6e659ca3b3..b51df6fa5a8 100644
--- a/tests/benchdnn/bnorm/bnorm.cpp
+++ b/tests/benchdnn/bnorm/bnorm.cpp
@@ -720,6 +720,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        if (v_prim[1]) SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/concat/concat.cpp b/tests/benchdnn/concat/concat.cpp
index b4020e635ce..2e3942c9379 100644
--- a/tests/benchdnn/concat/concat.cpp
+++ b/tests/benchdnn/concat/concat.cpp
@@ -198,6 +198,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         // The assumtion is the capacity doesn't change through the execution.
         static int capacity = 0;
diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp
index 1407431be62..ef0f25072bf 100644
--- a/tests/benchdnn/conv/conv.cpp
+++ b/tests/benchdnn/conv/conv.cpp
@@ -579,6 +579,11 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        // Don't check total size for CPU prim as the reference - it needs a
+        // special handling to combine both primitive memory requirements.
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp
index c5f47f8b914..9d145977114 100644
--- a/tests/benchdnn/deconv/deconv.cpp
+++ b/tests/benchdnn/deconv/deconv.cpp
@@ -556,6 +556,11 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        // Don't check total size for CPU prim as the reference - it needs a
+        // special handling to combine both primitive memory requirements.
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 0b1f2ed803b..ab4da891dde 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1105,7 +1105,7 @@ std::string smart_bytes(double bytes) {
     return s;
 }
 
-static int check_total_size(res_t *res) {
+int check_total_size(res_t *res) {
     static size_t cpu_device_capacity = get_cpu_ram_size();
     static size_t gpu_device_capacity = 0;
     static size_t gpu_max_alloc_capacity = 0;
@@ -1395,8 +1395,7 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     // Copy memory stats. It's required to accumulate them before performing
     // the check.
     res->mem_size_args = check_mem_size_args;
-
-    return check_total_size(res);
+    return OK;
 }
 
 int get_memory_footprint(const_dnnl_primitive_desc_t const_pd, res_t *res) {
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index 10df7e94582..72e2f898638 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -236,6 +236,7 @@ int get_cpu_cache_size(cpu_cache_args_t &cache_args);
 int get_gpu_cache_size(size_t &cache_size);
 
 std::string smart_bytes(double bytes);
+int check_total_size(res_t *res);
 bool is_fwd_training(dnnl_prop_kind_t prop_kind);
 bool is_fwd_prop_kind(dnnl_prop_kind_t prop_kind);
 int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);
@@ -419,7 +420,31 @@ int create_primitive(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &primw,
     //   Since the mem size check for all the operations are necessary,
     //   the check wouldn't be skipped.
     SAFE(check_mem_size(pdw, res, dir, /* need_skip = */ !is_graph_ref), WARN);
-    if (res->state == SKIPPED) return OK;
+
+    // The library scratchpad is allocated at create_primitive stage. The memory
+    // check is moved after the creation stage. It's necessary to check the
+    // library scratchpad size against gpu_max_alloc, otherwise, out_of_memory
+    // would be issued by the library.
+    if (res->mem_size_args.scratchpad_size > 0 && is_gpu()
+            && query_scratchpad_mode(query_attr(pdw))
+                    == dnnl_scratchpad_mode_library) {
+        static size_t gpu_device_capacity = 0;
+        static size_t gpu_max_alloc_capacity = 0;
+        SAFE(get_gpu_ram_sizes(gpu_device_capacity, gpu_max_alloc_capacity),
+                WARN);
+        const bool fit
+                = res->mem_size_args.scratchpad_size < gpu_max_alloc_capacity;
+        if (!fit) {
+            BENCHDNN_PRINT(1,
+                    "[CHECK_MEM]: Size of the scratchpad %s "
+                    "doesn't fit the allocation limit of %s.\n",
+                    smart_bytes(res->mem_size_args.scratchpad_size).c_str(),
+                    smart_bytes(gpu_max_alloc_capacity).c_str());
+            res->state = SKIPPED;
+            res->reason = skip_reason::not_enough_ram;
+            return OK;
+        }
+    }
 
     TIME_C_PRIM(DNN_SAFE(dnnl_primitive_create(&prim, pdw), WARN));
     primw.reset(prim);
diff --git a/tests/benchdnn/eltwise/eltwise.cpp b/tests/benchdnn/eltwise/eltwise.cpp
index a287c779d9b..a80dfd012e3 100644
--- a/tests/benchdnn/eltwise/eltwise.cpp
+++ b/tests/benchdnn/eltwise/eltwise.cpp
@@ -500,6 +500,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        if (v_prim[1]) SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/gnorm/gnorm.cpp b/tests/benchdnn/gnorm/gnorm.cpp
index 828e822d0ce..2584579657a 100644
--- a/tests/benchdnn/gnorm/gnorm.cpp
+++ b/tests/benchdnn/gnorm/gnorm.cpp
@@ -705,6 +705,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/ip/ip.cpp b/tests/benchdnn/ip/ip.cpp
index 1594aa5b6c4..7afe6221b99 100644
--- a/tests/benchdnn/ip/ip.cpp
+++ b/tests/benchdnn/ip/ip.cpp
@@ -410,6 +410,11 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        // Don't check total size for CPU prim as the reference - it needs a
+        // special handling to combine both primitive memory requirements.
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/lnorm/lnorm.cpp b/tests/benchdnn/lnorm/lnorm.cpp
index 30397a872f3..e6301c1cb7a 100644
--- a/tests/benchdnn/lnorm/lnorm.cpp
+++ b/tests/benchdnn/lnorm/lnorm.cpp
@@ -665,6 +665,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/lrn/lrn.cpp b/tests/benchdnn/lrn/lrn.cpp
index 7346f68d785..2cd1a695109 100644
--- a/tests/benchdnn/lrn/lrn.cpp
+++ b/tests/benchdnn/lrn/lrn.cpp
@@ -209,6 +209,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        if (v_prim[1]) SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/matmul/matmul.cpp b/tests/benchdnn/matmul/matmul.cpp
index 269d956d775..682908bab3e 100644
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@@ -884,6 +884,11 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        // Don't check total size for CPU prim as the reference - it needs a
+        // special handling to combine both primitive memory requirements.
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         // Don't check caches for CPU prim as the reference.
diff --git a/tests/benchdnn/pool/pool.cpp b/tests/benchdnn/pool/pool.cpp
index a807a90c644..cff1a3510f9 100644
--- a/tests/benchdnn/pool/pool.cpp
+++ b/tests/benchdnn/pool/pool.cpp
@@ -343,6 +343,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        if (v_prim[1]) SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/prelu/prelu.cpp b/tests/benchdnn/prelu/prelu.cpp
index b838856c1b7..1e39804cdd9 100644
--- a/tests/benchdnn/prelu/prelu.cpp
+++ b/tests/benchdnn/prelu/prelu.cpp
@@ -238,6 +238,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/reduction/reduction.cpp b/tests/benchdnn/reduction/reduction.cpp
index b765fd48c26..a0958ad530c 100644
--- a/tests/benchdnn/reduction/reduction.cpp
+++ b/tests/benchdnn/reduction/reduction.cpp
@@ -324,6 +324,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/reorder/reorder.cpp b/tests/benchdnn/reorder/reorder.cpp
index b42d5ab98d4..97cc6825830 100644
--- a/tests/benchdnn/reorder/reorder.cpp
+++ b/tests/benchdnn/reorder/reorder.cpp
@@ -490,6 +490,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/resampling/resampling.cpp b/tests/benchdnn/resampling/resampling.cpp
index 8a0181162e2..71f5746e300 100644
--- a/tests/benchdnn/resampling/resampling.cpp
+++ b/tests/benchdnn/resampling/resampling.cpp
@@ -251,6 +251,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp
index 43b037a5f44..52df85b9909 100644
--- a/tests/benchdnn/rnn/rnn.cpp
+++ b/tests/benchdnn/rnn/rnn.cpp
@@ -1253,6 +1253,10 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+        if (v_prim[1]) SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
         if (v_prim[1]) { SAFE(check_caches(v_prim[1], prb, res), WARN); }
diff --git a/tests/benchdnn/shuffle/shuffle.cpp b/tests/benchdnn/shuffle/shuffle.cpp
index a6cb33cb5d9..e9ab1ca7cd5 100644
--- a/tests/benchdnn/shuffle/shuffle.cpp
+++ b/tests/benchdnn/shuffle/shuffle.cpp
@@ -183,6 +183,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/softmax/softmax.cpp b/tests/benchdnn/softmax/softmax.cpp
index 13a5dfa3fe4..ecc94d5f17c 100644
--- a/tests/benchdnn/softmax/softmax.cpp
+++ b/tests/benchdnn/softmax/softmax.cpp
@@ -415,6 +415,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/sum/sum.cpp b/tests/benchdnn/sum/sum.cpp
index e5f1e31a79f..33ed1ac2c1f 100644
--- a/tests/benchdnn/sum/sum.cpp
+++ b/tests/benchdnn/sum/sum.cpp
@@ -176,6 +176,9 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
+    if (has_bench_mode_bit(mode_bit_t::exec)) {
+        SAFE(check_total_size(res), WARN);
+    }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
     }
diff --git a/tests/benchdnn/utils/dnnl_query.cpp b/tests/benchdnn/utils/dnnl_query.cpp
index db39f30dd45..dfcf7aed8a3 100644
--- a/tests/benchdnn/utils/dnnl_query.cpp
+++ b/tests/benchdnn/utils/dnnl_query.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -99,6 +99,12 @@ bool query_post_ops_has_kind(
     return false;
 }
 
+dnnl_scratchpad_mode_t query_scratchpad_mode(const_dnnl_primitive_attr_t attr) {
+    dnnl_scratchpad_mode_t mode = dnnl_scratchpad_mode_library;
+    dnnl_primitive_attr_get_scratchpad_mode(attr, &mode);
+    return mode;
+}
+
 const_dnnl_post_ops_t query_post_ops(const_dnnl_primitive_attr_t attr) {
     const_dnnl_post_ops_t post_ops {};
     dnnl_primitive_attr_get_post_ops(attr, &post_ops);
diff --git a/tests/benchdnn/utils/dnnl_query.hpp b/tests/benchdnn/utils/dnnl_query.hpp
index 5470a495dde..39afc78004c 100644
--- a/tests/benchdnn/utils/dnnl_query.hpp
+++ b/tests/benchdnn/utils/dnnl_query.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@ int query_n_outputs(const_dnnl_primitive_desc_t pd);
 bool query_post_ops_has_kind(dnnl_primitive_t prim, dnnl_primitive_kind_t kind);
 bool query_post_ops_has_kind(
         const_dnnl_post_ops_t post_ops, dnnl_primitive_kind_t kind);
+dnnl_scratchpad_mode_t query_scratchpad_mode(const_dnnl_primitive_attr_t attr);
 const_dnnl_post_ops_t query_post_ops(const_dnnl_primitive_attr_t attr);
 const_dnnl_post_ops_t query_post_ops(const_dnnl_primitive_desc_t pd);
 const_dnnl_primitive_attr_t query_attr(const_dnnl_primitive_desc_t pd);

From d357763ed29c331c2e6302b1089985312a300459 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Thu, 6 Feb 2025 15:34:35 -0800
Subject: [PATCH 13/18] benchdnn: rename check_mem_size into collect and change
 args

---
 tests/benchdnn/dnnl_common.cpp | 10 +++++-----
 tests/benchdnn/dnnl_common.hpp | 15 +++++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index ab4da891dde..ed79d1ba47a 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1351,8 +1351,8 @@ int check_mem_size(const_dnnl_memory_desc_t md, res_t *res) {
     return check_total_size(res);
 }
 
-int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
-        bool need_skip) {
+int collect_mem_size(check_mem_size_args_t &mem_size_args,
+        const_dnnl_primitive_desc_t const_pd, dir_t dir, bool need_skip) {
     // Skip the check if it is disabled.
     if (!mem_check) return OK;
 
@@ -1365,7 +1365,7 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
     // test objects when a double-run driver executes the fwd-for-bwd object
     // first and the bwd object after.
     // ANCHOR: MEM_CHECK_ARGS_DIR;
-    if (need_skip && res->mem_size_args.dir == dir) return OK;
+    if (need_skip && mem_size_args.dir == dir) return OK;
 
     // Get input sizes.
     check_mem_size_args_t check_mem_size_args(
@@ -1394,7 +1394,7 @@ int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
 
     // Copy memory stats. It's required to accumulate them before performing
     // the check.
-    res->mem_size_args = check_mem_size_args;
+    mem_size_args = check_mem_size_args;
     return OK;
 }
 
@@ -1407,7 +1407,7 @@ int get_memory_footprint(const_dnnl_primitive_desc_t const_pd, res_t *res) {
     get_memory_bytes(check_mem_out_size_args); // Get output bytes.
 
     // Sum post-ops include dst bytes as an input. Not included in get_memory_bytes
-    // since it would cause check_mem_size to double-count dst bytes.
+    // since it would cause `collect_mem_size` to double-count dst bytes.
     auto const_attr_po = query_post_ops(const_pd);
     auto po_len = dnnl_post_ops_len(const_attr_po);
     for (int idx = 0; idx < po_len; ++idx) {
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index 72e2f898638..2ab42f36f25 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -243,9 +243,12 @@ int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);
 int check_same_pd(const dnnl_primitive_desc_t &pd_no_attr, res_t *res);
 int test_persistent_cache_api(
         benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim, res_t *res);
+// This call is used in zeropad only and still does check inside, too.
 int check_mem_size(const_dnnl_memory_desc_t md, res_t *res);
-int check_mem_size(const_dnnl_primitive_desc_t const_pd, res_t *res, dir_t dir,
-        bool need_skip = true);
+// Only collects memory sizes from an input `const_pd` and puts the result into
+// `mem_size_args`.
+int collect_mem_size(check_mem_size_args_t &mem_size_args,
+        const_dnnl_primitive_desc_t const_pd, dir_t dir, bool need_skip = true);
 
 inline bool should_stop(const timer::timer_t &t) {
     const bool stop = false
@@ -415,11 +418,11 @@ int create_primitive(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &primw,
             WARN);
     if (res->state == SKIPPED) return OK;
 
-    // Check memory requirements if only execution happens.
     // Note: Graph may contain more than one operation with identical `dir`.
-    //   Since the mem size check for all the operations are necessary,
-    //   the check wouldn't be skipped.
-    SAFE(check_mem_size(pdw, res, dir, /* need_skip = */ !is_graph_ref), WARN);
+    //   It's required to collect all memory sizes regardless of `dir`.
+    SAFE(collect_mem_size(res->mem_size_args, pdw, dir,
+                 /* need_skip = */ !is_graph_ref),
+            WARN);
 
     // The library scratchpad is allocated at create_primitive stage. The memory
     // check is moved after the creation stage. It's necessary to check the

From 773569589dc6657ef31c406e8f00b33fc7287619 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Thu, 6 Feb 2025 17:30:20 -0800
Subject: [PATCH 14/18] benchdnn: utils: res: check_mem_args: split cpu memory
 into pieces

---
 tests/benchdnn/dnnl_common.cpp         | 17 ++++++++++-------
 tests/benchdnn/graph/ref_partition.cpp |  6 ++++--
 tests/benchdnn/utils/res.hpp           | 19 +++++++++++++------
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index ed79d1ba47a..6ab7e70792d 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -1166,13 +1166,16 @@ int check_total_size(res_t *res) {
                 smart_bytes(gpu_max_alloc_capacity).c_str());
     }
 
+    size_t total_size_cpu = check_mem_size_args.total_size_ref
+            + check_mem_size_args.total_size_compare
+            + check_mem_size_args.total_size_mapped;
     // If the problem runs on CPU, the combined memory represents requirements
     // for the library and for the reference paths.
     // If the problem runs on a device, the combined memory represents potential
     // requirement for integrated devices that use CPU pool for both memories.
     // The second case has higher limit because TODO:<the_reason>.
-    size_t cpu_and_device_size = check_mem_size_args.total_size_cpu
-            + check_mem_size_args.total_size_device;
+    size_t cpu_and_device_size
+            = total_size_cpu + check_mem_size_args.total_size_device;
     bool fits_cpu_ram = cpu_and_device_size
             <= (is_cpu() ? benchdnn_cpu_limit : cpu_device_capacity);
 
@@ -1227,7 +1230,7 @@ int check_total_size(res_t *res) {
     BENCHDNN_PRINT((!fits_cpu_ram ? 1 : 6),
             "[CHECK_MEM][%s]: Requested: %s%s (Service), %s (combined);\n",
             dir_c_str(), total_size_device_str.c_str(),
-            smart_bytes(check_mem_size_args.total_size_cpu).c_str(),
+            smart_bytes(total_size_cpu).c_str(),
             smart_bytes(cpu_and_device_size).c_str());
 
     return res->state == FAILED ? FAIL : OK;
@@ -1256,7 +1259,7 @@ void add_md_size(const_dnnl_memory_desc_t md,
             && !has_bench_mode_modifier(mode_modifier_t::no_ref_memory);
 
     // Mapped memory for GPU backend on CPU.
-    check_mem_size_args.total_size_cpu += mapped_mem_factor * mem_size;
+    check_mem_size_args.total_size_mapped += mapped_mem_factor * mem_size;
 
     const bool is_corr = has_bench_mode_bit(mode_bit_t::corr);
     const bool is_bitwise = has_bench_mode_bit(mode_bit_t::bitwise);
@@ -1270,19 +1273,19 @@ void add_md_size(const_dnnl_memory_desc_t md,
     check_mem_size_args.total_ref_md_size[ref_mem_idx] = ref_md_size;
 
     // A memory copy for ref_compute, happens only in correctness.
-    check_mem_size_args.total_size_cpu += is_corr * ref_md_size;
+    check_mem_size_args.total_size_ref += is_corr * ref_md_size;
 
     // Comparison function allocates an additional tag::abx f32 memory.
     // This allocation holds for correctness and bitwise modes.
     const bool compare_mem_factor
             = !check_mem_size_args.want_input && (is_corr || is_bitwise);
-    check_mem_size_args.total_size_cpu += compare_mem_factor * ref_md_size;
+    check_mem_size_args.total_size_compare += compare_mem_factor * ref_md_size;
 
     // Bitwise comparison allocates an additional tag::abx f32 memory from
     // the first run to compare results against it.
     const bool bitwise_compare_mem_factor
             = !check_mem_size_args.want_input && is_bitwise;
-    check_mem_size_args.total_size_cpu
+    check_mem_size_args.total_size_compare
             += bitwise_compare_mem_factor * ref_md_size;
 }
 
diff --git a/tests/benchdnn/graph/ref_partition.cpp b/tests/benchdnn/graph/ref_partition.cpp
index 58774795ba7..306012ff97b 100644
--- a/tests/benchdnn/graph/ref_partition.cpp
+++ b/tests/benchdnn/graph/ref_partition.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -532,7 +532,9 @@ int ref_partition_t::check_partition_total_size(
     // after reference path data filling(`C` mode only)
     // 3. Memory to be allocated for comparing results(`C` mode only)
     // 4. Memory to be allocated for mapping device memory(GPU backend only)
-    size_t new_cpu_req = check_mem_size_args.total_size_cpu;
+    size_t new_cpu_req = check_mem_size_args.total_size_ref
+            + check_mem_size_args.total_size_compare
+            + check_mem_size_args.total_size_mapped;
     size_t new_gpu_req = check_mem_size_args.total_size_device;
 
     // STEP 1: Memory allocation stage for the reference path
diff --git a/tests/benchdnn/utils/res.hpp b/tests/benchdnn/utils/res.hpp
index fa728a4958b..15908b406e7 100644
--- a/tests/benchdnn/utils/res.hpp
+++ b/tests/benchdnn/utils/res.hpp
@@ -70,13 +70,20 @@ struct check_mem_size_args_t {
     // `sizes` used to validate OpenCL memory requirements.
     std::vector<size_t> sizes;
     // `total_size_device` specifies memory allocated on device for a test obj.
+    // It's an accumulated result of `sizes` values.
     size_t total_size_device = 0;
-    // `total_size_cpu` specifies:
-    // * Memory allocated for reference ocmputations (`C` mode only).
-    // * Memory allocated for comparison results (`C` mode only).
-    // * Memory allocated for mapping device memory (GPU backend only).
-    // * Memory allocated on CPU for a test obj (CPU backend only).
-    size_t total_size_cpu = 0;
+    // `total_size_ref` specifies Memory allocated for reference computations
+    // (`C` mode only). This value can represent either memory sizes needed for
+    // a naive reference implementation on plain formats, or memory sizes needed
+    // for a prim_ref (--fast-ref) test object which can utilize blocked
+    // formats.
+    size_t total_size_ref = 0;
+    // `total_size_compare` specifies memory allocated for comparison results
+    // tensor (`C` mode only).
+    size_t total_size_compare = 0;
+    // `total_size_mapped` specifies memory allocated for mapped buffers on the
+    // host (GPU backend only).
+    size_t total_size_mapped = 0;
     // `total_ref_md_size` specifies the additional tag::abx f32 memory
     // required for correctness check.
     // * The first element refers to the total memory for input reference

From c32b05742396b42a80864df28594a6669ad2cff8 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Mon, 10 Feb 2025 16:10:27 -0800
Subject: [PATCH 15/18] benchdnn: consolidate a common part of init_prim_ref
 function

---
 tests/benchdnn/conv/conv.cpp     | 31 +++---------------------------
 tests/benchdnn/deconv/deconv.cpp | 31 +++---------------------------
 tests/benchdnn/dnnl_common.hpp   | 33 ++++++++++++++++++++++++++++++++
 tests/benchdnn/ip/ip.cpp         | 31 +++---------------------------
 tests/benchdnn/matmul/matmul.cpp | 31 +++---------------------------
 5 files changed, 45 insertions(+), 112 deletions(-)

diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp
index ef0f25072bf..49b5e19fbf5 100644
--- a/tests/benchdnn/conv/conv.cpp
+++ b/tests/benchdnn/conv/conv.cpp
@@ -333,7 +333,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
         prim_ref_dt.erase(prim_ref_dt.begin());
         prim_ref_bia_dt.erase(prim_ref_bia_dt.begin());
     }
-    dnnl_primitive_t prim_ref_ {};
 
     for_(const auto &prim_ref_dt_i : prim_ref_dt)
     for (const auto &prim_ref_bia_dt_i : prim_ref_bia_dt) {
@@ -342,35 +341,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
                 prb->mb, cpu_attr, prb->ctx_init, prb->ctx_exe,
                 prb->impl_filter};
 
-        init_pd_args_t<prb_t> init_pd_args(
-                /* res = */ nullptr, get_cpu_engine(), &prb_cpu, prb->dir,
-                /* hint = */ nullptr, /* src_md = */ nullptr);
-        init_pd(init_pd_args);
-
-        benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pdw;
-        // `is_service_prim=true` prevents from filtering the implementation
-        // by name which is intended through a `get_prim_ref_impl_filter()`.
-        // As `fetch_impl` doesn't have any further logic related to it, it's
-        // safe to set it to `false`.
-        fetch_impl(pdw, init_pd_args, get_prim_ref_impl_filter(),
-                /* res = */ nullptr,
-                /* is_service_prim = */ false);
-
-        // Prim desc wasn't created - try the next set...
-        if (!pdw) continue;
-
-        auto st = dnnl_primitive_create(&prim_ref_, pdw);
-        // Primitive wasn't created - try the next set...
-        if (st != dnnl_success) continue;
-
-        BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
-                query_impl_info(pdw).c_str());
-        res->prim_ref_repro = prb_cpu.str();
-        prim_ref.reset(prim_ref_);
-        return OK;
+        auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
+        if (st == OK) return OK;
     }
 
-    prim_ref.reset(prim_ref_);
+    prim_ref.reset(nullptr);
     return OK;
 }
 
diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp
index 9d145977114..25e43009cf0 100644
--- a/tests/benchdnn/deconv/deconv.cpp
+++ b/tests/benchdnn/deconv/deconv.cpp
@@ -322,7 +322,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
         prim_ref_dt.erase(prim_ref_dt.begin());
         prim_ref_bia_dt.erase(prim_ref_bia_dt.begin());
     }
-    dnnl_primitive_t prim_ref_ {};
 
     for_(const auto &prim_ref_dt_i : prim_ref_dt)
     for (const auto &prim_ref_bia_dt_i : prim_ref_bia_dt) {
@@ -330,35 +329,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
                 tag::any, tag::any, tag::any, DIRECT, prb->mb, cpu_attr,
                 prb->ctx_init, prb->ctx_exe, prb->impl_filter};
 
-        init_pd_args_t<prb_t> init_pd_args(
-                /* res = */ nullptr, get_cpu_engine(), &prb_cpu, prb->dir,
-                /* hint = */ nullptr, /* src_md = */ nullptr);
-        init_pd(init_pd_args);
-
-        benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pdw;
-        // `is_service_prim=true` prevents from filtering the implementation
-        // by name which is intended through a `get_prim_ref_impl_filter()`.
-        // As `fetch_impl` doesn't have any further logic related to it, it's
-        // safe to set it to `false`.
-        fetch_impl(pdw, init_pd_args, get_prim_ref_impl_filter(),
-                /* res = */ nullptr,
-                /* is_service_prim = */ false);
-
-        // Prim desc wasn't created - try the next set...
-        if (!pdw) continue;
-
-        auto st = dnnl_primitive_create(&prim_ref_, pdw);
-        // Primitive wasn't created - try the next set...
-        if (st != dnnl_success) continue;
-
-        BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
-                query_impl_info(pdw).c_str());
-        res->prim_ref_repro = prb_cpu.str();
-        prim_ref.reset(prim_ref_);
-        return OK;
+        auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
+        if (st == OK) return OK;
     }
 
-    prim_ref.reset(prim_ref_);
+    prim_ref.reset(nullptr);
     return OK;
 }
 
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index 2ab42f36f25..e974fb4b50e 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -1058,4 +1058,37 @@ int init_ref_memory_args_default_case(int exec_arg, dnn_mem_t &mem,
 int check_bitwise(dnnl_primitive_t prim, const std::vector<data_kind_t> &kinds,
         const args_t &args, const attr_t &attr, bool inplace, res_t *res);
 
+template <typename prb_t>
+int init_prim_ref_common(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
+        const prb_t *prb_cpu, res_t *res) {
+
+    init_pd_args_t<prb_t> init_pd_args(
+            /* res = */ nullptr, get_cpu_engine(), prb_cpu, prb_cpu->dir,
+            /* hint = */ nullptr, /* src_md = */ nullptr);
+    init_pd(init_pd_args);
+
+    benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pdw;
+    // `is_service_prim=true` prevents from filtering the implementation
+    // by name which is intended through a `get_prim_ref_impl_filter()`.
+    // As `fetch_impl` doesn't have any further logic related to it, it's
+    // safe to set it to `false`.
+    fetch_impl(pdw, init_pd_args, get_prim_ref_impl_filter(),
+            /* res = */ nullptr,
+            /* is_service_prim = */ false);
+
+    // Prim desc wasn't created - try the next set...
+    if (!pdw) return FAIL;
+
+    dnnl_primitive_t prim_ref_ptr {};
+    auto st = dnnl_primitive_create(&prim_ref_ptr, pdw);
+    // Primitive wasn't created - try the next set...
+    if (st != dnnl_success) return FAIL;
+
+    BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
+            query_impl_info(pdw).c_str());
+    res->prim_ref_repro = prb_cpu->str();
+    prim_ref.reset(prim_ref_ptr);
+    return OK;
+}
+
 #endif
diff --git a/tests/benchdnn/ip/ip.cpp b/tests/benchdnn/ip/ip.cpp
index 7afe6221b99..f8428e3c2e9 100644
--- a/tests/benchdnn/ip/ip.cpp
+++ b/tests/benchdnn/ip/ip.cpp
@@ -113,7 +113,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
         prim_ref_dt.erase(prim_ref_dt.begin());
         prim_ref_bia_dt.erase(prim_ref_bia_dt.begin());
     }
-    dnnl_primitive_t prim_ref_ {};
 
     for_(const auto &prim_ref_dt_i : prim_ref_dt)
     for (const auto &prim_ref_bia_dt_i : prim_ref_bia_dt) {
@@ -121,35 +120,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
                 tag::any, tag::any, tag::any, prb->mb, cpu_attr, prb->ctx_init,
                 prb->ctx_exe, prb->impl_filter};
 
-        init_pd_args_t<prb_t> init_pd_args(
-                /* res = */ nullptr, get_cpu_engine(), &prb_cpu, prb->dir,
-                /* hint = */ nullptr, /* src_md = */ nullptr);
-        init_pd(init_pd_args);
-
-        benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pdw;
-        // `is_service_prim=true` prevents from filtering the implementation
-        // by name which is intended through a `get_prim_ref_impl_filter()`.
-        // As `fetch_impl` doesn't have any further logic related to it, it's
-        // safe to set it to `false`.
-        fetch_impl(pdw, init_pd_args, get_prim_ref_impl_filter(),
-                /* res = */ nullptr,
-                /* is_service_prim = */ false);
-
-        // Prim desc wasn't created - try the next set...
-        if (!pdw) continue;
-
-        auto st = dnnl_primitive_create(&prim_ref_, pdw);
-        // Primitive wasn't created - try the next set...
-        if (st != dnnl_success) continue;
-
-        BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
-                query_impl_info(pdw).c_str());
-        res->prim_ref_repro = prb_cpu.str();
-        prim_ref.reset(prim_ref_);
-        return OK;
+        auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
+        if (st == OK) return OK;
     }
 
-    prim_ref.reset(prim_ref_);
+    prim_ref.reset(nullptr);
     return OK;
 }
 
diff --git a/tests/benchdnn/matmul/matmul.cpp b/tests/benchdnn/matmul/matmul.cpp
index 682908bab3e..ffa6a6a46b6 100644
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@@ -202,7 +202,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
         prim_ref_dt.erase(prim_ref_dt.begin());
         prim_ref_bia_dt.erase(prim_ref_bia_dt.begin());
     }
-    dnnl_primitive_t prim_ref_ {};
 
     for_(const auto &prim_ref_dt_i : prim_ref_dt)
     for (const auto &prim_ref_bia_dt_i : prim_ref_bia_dt) {
@@ -214,35 +213,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
 #endif
                 cpu_attr, prb->ctx_init, prb->ctx_exe, prb->impl_filter};
 
-        init_pd_args_t<prb_t> init_pd_args(
-                /* res = */ nullptr, get_cpu_engine(), &prb_cpu, prb->dir,
-                /* hint = */ nullptr, /* src_md = */ nullptr);
-        init_pd(init_pd_args);
-
-        benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pdw;
-        // `is_service_prim=true` prevents from filtering the implementation
-        // by name which is intended through a `get_prim_ref_impl_filter()`.
-        // As `fetch_impl` doesn't have any further logic related to it, it's
-        // safe to set it to `false`.
-        fetch_impl(pdw, init_pd_args, get_prim_ref_impl_filter(),
-                /* res = */ nullptr,
-                /* is_service_prim = */ false);
-
-        // Prim desc wasn't created - try the next set...
-        if (!pdw) continue;
-
-        auto st = dnnl_primitive_create(&prim_ref_, pdw);
-        // Primitive wasn't created - try the next set...
-        if (st != dnnl_success) continue;
-
-        BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
-                query_impl_info(pdw).c_str());
-        res->prim_ref_repro = prb_cpu.str();
-        prim_ref.reset(prim_ref_);
-        return OK;
+        auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
+        if (st == OK) return OK;
     }
 
-    prim_ref.reset(prim_ref_);
+    prim_ref.reset(nullptr);
     return OK;
 }
 

From 19800aaa2eca99a9a2dc4300b538c98e2ebb48fc Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 12 Feb 2025 13:47:59 -0800
Subject: [PATCH 16/18] benchdnn: mem_check: estimate prim_ref memory properly

---
 tests/benchdnn/conv/conv.cpp     | 15 ++++++++---
 tests/benchdnn/deconv/deconv.cpp | 15 ++++++++---
 tests/benchdnn/dnnl_common.cpp   | 44 +++++++++++++++++++++++++++-----
 tests/benchdnn/dnnl_common.hpp   |  2 +-
 tests/benchdnn/ip/ip.cpp         | 15 ++++++++---
 tests/benchdnn/matmul/matmul.cpp | 15 ++++++++---
 6 files changed, 86 insertions(+), 20 deletions(-)

diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp
index 49b5e19fbf5..6690be4b377 100644
--- a/tests/benchdnn/conv/conv.cpp
+++ b/tests/benchdnn/conv/conv.cpp
@@ -555,9 +555,18 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     if (has_bench_mode_bit(mode_bit_t::exec)) {
-        SAFE(check_total_size(res), WARN);
-        // Don't check total size for CPU prim as the reference - it needs a
-        // special handling to combine both primitive memory requirements.
+        const auto &prim_ref = v_prim[1];
+        if (prim_ref) {
+            // Copy res to avoid save/restore state and reason.
+            res_t res_copy = *res;
+            SAFE(check_total_size(&res_copy, prim_ref), WARN);
+            if (res_copy.state == SKIPPED) {
+                v_prim[1].reset(nullptr);
+                SAFE(check_total_size(res), WARN);
+            }
+        } else {
+            SAFE(check_total_size(res), WARN);
+        }
     }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp
index 25e43009cf0..9ad15f0cf9d 100644
--- a/tests/benchdnn/deconv/deconv.cpp
+++ b/tests/benchdnn/deconv/deconv.cpp
@@ -532,9 +532,18 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     if (has_bench_mode_bit(mode_bit_t::exec)) {
-        SAFE(check_total_size(res), WARN);
-        // Don't check total size for CPU prim as the reference - it needs a
-        // special handling to combine both primitive memory requirements.
+        const auto &prim_ref = v_prim[1];
+        if (prim_ref) {
+            // Copy res to avoid save/restore state and reason.
+            res_t res_copy = *res;
+            SAFE(check_total_size(&res_copy, prim_ref), WARN);
+            if (res_copy.state == SKIPPED) {
+                v_prim[1].reset(nullptr);
+                SAFE(check_total_size(res), WARN);
+            }
+        } else {
+            SAFE(check_total_size(res), WARN);
+        }
     }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
index 6ab7e70792d..97294eb003d 100644
--- a/tests/benchdnn/dnnl_common.cpp
+++ b/tests/benchdnn/dnnl_common.cpp
@@ -17,6 +17,7 @@
 #include <algorithm> // for std::reverse and std::copy
 #include <functional> // for std::bind and std::placeholders
 #include <list>
+#include <numeric>
 #include <string> // for std::string
 #include <utility> // for std::pair
 #include <vector> // for std::vector
@@ -1105,7 +1106,17 @@ std::string smart_bytes(double bytes) {
     return s;
 }
 
-int check_total_size(res_t *res) {
+// The function logic is the following:
+// `checkit` function verifies that the bare minimum (the library and the stock
+// reference) memory requirements are complied with the limits.
+// If no, drop the case, can't run it.
+// If yes, the second call to this function with `prim_ref` specified will
+// check memory requirements for prim_ref, update according memory parts and
+// verify updated numbers if they are complied.
+// If yes, good to go with a `prim_ref`.
+// If no, indicate that the system won't make it and drop `prim_ref` falling
+// back to stock reference.
+int check_total_size(res_t *res, dnnl_primitive_t prim_ref) {
     static size_t cpu_device_capacity = get_cpu_ram_size();
     static size_t gpu_device_capacity = 0;
     static size_t gpu_max_alloc_capacity = 0;
@@ -1166,7 +1177,22 @@ int check_total_size(res_t *res) {
                 smart_bytes(gpu_max_alloc_capacity).c_str());
     }
 
-    size_t total_size_cpu = check_mem_size_args.total_size_ref
+    // Note: in theory, `total_size_ref` can be smaller for a `prim_ref` because
+    // stock reference uses f32 for estimation and best `prim_ref` tries
+    // requested data types first which can be lower precision data types which
+    // require less memory.
+    size_t total_size_ref = check_mem_size_args.total_size_ref;
+    if (prim_ref) {
+        // Collect memory sizes of prim_ref.
+        check_mem_size_args_t prim_ref_mem_size_args;
+        collect_mem_size(prim_ref_mem_size_args, query_pd(prim_ref), DIR_UNDEF,
+                /* need_skip = */ false);
+        // Update reference size number.
+        total_size_ref = std::accumulate(prim_ref_mem_size_args.sizes.begin(),
+                prim_ref_mem_size_args.sizes.end(), 0ULL);
+    }
+
+    size_t total_size_cpu = total_size_ref
             + check_mem_size_args.total_size_compare
             + check_mem_size_args.total_size_mapped;
     // If the problem runs on CPU, the combined memory represents requirements
@@ -1183,9 +1209,16 @@ int check_total_size(res_t *res) {
     // for integrated devices and mapping/unmapping memory.
 
     if (!fits_cpu_ram) {
+        std::string prim_ref_msg
+                = prim_ref ? " with CPU primitive reference" : "";
         BENCHDNN_PRINT(1,
-                "[CHECK_MEM][%s]: Not enough CPU RAM for a problem.\n",
-                dir_c_str());
+                "[CHECK_MEM][%s]: Not enough CPU RAM for a problem%s.\n",
+                dir_c_str(), prim_ref_msg.c_str());
+        res->state = SKIPPED;
+        res->reason = skip_reason::not_enough_ram;
+    }
+
+    if (!fits_cpu_ram) {
         // Try to catch a huge scratchpad size requested by the library.
         // Use following logic:
         //     scratch_size
@@ -1204,10 +1237,7 @@ int check_total_size(res_t *res) {
                     (size_t)(scratch_trh
                             * check_mem_size_args.total_size_device));
             res->state = FAILED;
-        } else {
-            res->state = SKIPPED;
         }
-        res->reason = skip_reason::not_enough_ram;
     }
 
     BENCHDNN_PRINT((!fits_cpu_ram ? 1 : 6),
diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index e974fb4b50e..de181779a39 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -236,7 +236,7 @@ int get_cpu_cache_size(cpu_cache_args_t &cache_args);
 int get_gpu_cache_size(size_t &cache_size);
 
 std::string smart_bytes(double bytes);
-int check_total_size(res_t *res);
+int check_total_size(res_t *res, dnnl_primitive_t prim_ref = nullptr);
 bool is_fwd_training(dnnl_prop_kind_t prop_kind);
 bool is_fwd_prop_kind(dnnl_prop_kind_t prop_kind);
 int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);
diff --git a/tests/benchdnn/ip/ip.cpp b/tests/benchdnn/ip/ip.cpp
index f8428e3c2e9..3ab654c8d4f 100644
--- a/tests/benchdnn/ip/ip.cpp
+++ b/tests/benchdnn/ip/ip.cpp
@@ -386,9 +386,18 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     if (has_bench_mode_bit(mode_bit_t::exec)) {
-        SAFE(check_total_size(res), WARN);
-        // Don't check total size for CPU prim as the reference - it needs a
-        // special handling to combine both primitive memory requirements.
+        const auto &prim_ref = v_prim[1];
+        if (prim_ref) {
+            // Copy res to avoid save/restore state and reason.
+            res_t res_copy = *res;
+            SAFE(check_total_size(&res_copy, prim_ref), WARN);
+            if (res_copy.state == SKIPPED) {
+                v_prim[1].reset(nullptr);
+                SAFE(check_total_size(res), WARN);
+            }
+        } else {
+            SAFE(check_total_size(res), WARN);
+        }
     }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);
diff --git a/tests/benchdnn/matmul/matmul.cpp b/tests/benchdnn/matmul/matmul.cpp
index ffa6a6a46b6..ca709152f3d 100644
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@@ -860,9 +860,18 @@ int createit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
 int checkit(std::vector<benchdnn_dnnl_wrapper_t<dnnl_primitive_t>> &v_prim,
         const prb_t *prb, res_t *res) {
     if (has_bench_mode_bit(mode_bit_t::exec)) {
-        SAFE(check_total_size(res), WARN);
-        // Don't check total size for CPU prim as the reference - it needs a
-        // special handling to combine both primitive memory requirements.
+        const auto &prim_ref = v_prim[1];
+        if (prim_ref) {
+            // Copy res to avoid save/restore state and reason.
+            res_t res_copy = *res;
+            SAFE(check_total_size(&res_copy, prim_ref), WARN);
+            if (res_copy.state == SKIPPED) {
+                v_prim[1].reset(nullptr);
+                SAFE(check_total_size(res), WARN);
+            }
+        } else {
+            SAFE(check_total_size(res), WARN);
+        }
     }
     if (has_bench_mode_bit(mode_bit_t::corr)) {
         SAFE(check_caches(v_prim[0], prb, res), WARN);

From 63abc0fc454520382a65ac8ff44b5b26b8752680 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Fri, 14 Feb 2025 21:27:13 -0800
Subject: [PATCH 17/18] benchdnn: prim_ref impl filter ignores values from
 global-(skip-)impl

---
 tests/benchdnn/self/common.cpp       | 11 ++++++----
 tests/benchdnn/utils/impl_filter.cpp | 32 +++++++++++++++-------------
 tests/benchdnn/utils/impl_filter.hpp | 21 +++++++++++++-----
 tests/benchdnn/utils/parser.cpp      |  2 +-
 4 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/tests/benchdnn/self/common.cpp b/tests/benchdnn/self/common.cpp
index aceacc11fc8..eb7080f0426 100644
--- a/tests/benchdnn/self/common.cpp
+++ b/tests/benchdnn/self/common.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -518,13 +518,16 @@ static int check_trim_tags() {
 }
 
 static int check_skip_impl() {
-    impl_filter_t impl_filter({"gemm"}, /* use_impl = */ false);
+    impl_filter_t impl_filter({"gemm"}, /* use_impl = */ false,
+            /* respect_global_filter = */ true);
     SELF_CHECK_EQ(true, need_next_impl("x64:gemm:jit", impl_filter));
 
-    impl_filter = impl_filter_t({"ref", "x64:gemm"}, /* use_impl = */ false);
+    impl_filter = impl_filter_t({"ref", "x64:gemm"}, /* use_impl = */ false,
+            /* respect_global_filter = */ true);
     SELF_CHECK_EQ(true, need_next_impl("x64:gemm:jit", impl_filter));
 
-    impl_filter = impl_filter_t({"this_finds_nothing"}, /* use_impl = */ false);
+    impl_filter = impl_filter_t({"this_finds_nothing"}, /* use_impl = */ false,
+            /* respect_global_filter = */ true);
     SELF_CHECK_EQ(false, need_next_impl("x64:gemm:jit", impl_filter));
 
     return OK;
diff --git a/tests/benchdnn/utils/impl_filter.cpp b/tests/benchdnn/utils/impl_filter.cpp
index 36f52fd7619..8de964d4013 100644
--- a/tests/benchdnn/utils/impl_filter.cpp
+++ b/tests/benchdnn/utils/impl_filter.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -21,8 +21,9 @@
 impl_filter_t global_impl_filter {};
 
 const impl_filter_t &get_prim_ref_impl_filter() {
-    static const impl_filter_t prim_ref_impl_filter(
-            {"ref:any", "ref_int8:any"}, /* use_impl = */ false);
+    static const impl_filter_t prim_ref_impl_filter({"ref:any", "ref_int8:any"},
+            /* use_impl = */ false,
+            /* respect_global_filter = */ false);
     return prim_ref_impl_filter;
 }
 
@@ -37,17 +38,18 @@ std::string get_impl_filter_name(const impl_filter_t &impl_filter) {
 // This operator takes the global filter into account as well. No need to dump
 // it additionally in a common spot.
 std::ostream &operator<<(std::ostream &s, const impl_filter_t &impl_filter) {
-    const bool is_def = global_impl_filter.is_def() && impl_filter.is_def();
+    const bool is_global_def = IMPLICATION(
+            impl_filter.respect_global_filter(), global_impl_filter.is_def());
+    const bool is_def = is_global_def && impl_filter.is_def();
     if (is_def) return s;
 
-    const auto &option_name = !global_impl_filter.is_def()
+    const auto &option_name = !is_global_def
             ? get_impl_filter_name(global_impl_filter)
             : get_impl_filter_name(impl_filter);
     s << option_name << "=";
 
-    const auto &names = !global_impl_filter.is_def()
-            ? global_impl_filter.get_names()
-            : impl_filter.get_names();
+    const auto &names = !is_global_def ? global_impl_filter.get_names()
+                                       : impl_filter.get_names();
     const size_t sz = names.size();
     for (size_t i = 0; i < sz - 1; i++) {
         s << names[i] << ",";
@@ -59,15 +61,15 @@ std::ostream &operator<<(std::ostream &s, const impl_filter_t &impl_filter) {
 
 bool need_next_impl(
         const std::string &impl_name, const impl_filter_t &impl_filter) {
-    const bool is_def = global_impl_filter.is_def() && impl_filter.is_def();
+    const bool is_global_def = IMPLICATION(
+            impl_filter.respect_global_filter(), global_impl_filter.is_def());
+    const bool is_def = is_global_def && impl_filter.is_def();
     if (is_def) return false;
 
-    const bool use_impl = !global_impl_filter.is_def()
-            ? global_impl_filter.use_impl()
-            : impl_filter.use_impl();
-    const auto &names = !global_impl_filter.is_def()
-            ? global_impl_filter.get_names()
-            : impl_filter.get_names();
+    const bool use_impl = !is_global_def ? global_impl_filter.use_impl()
+                                         : impl_filter.use_impl();
+    const auto &names = !is_global_def ? global_impl_filter.get_names()
+                                       : impl_filter.get_names();
 
     // If the name hits the list and `use_impl_=true`, no need the next impl.
     // If the name hits the list and `use_impl_=false`, needs the next impl.
diff --git a/tests/benchdnn/utils/impl_filter.hpp b/tests/benchdnn/utils/impl_filter.hpp
index 802604f46d4..e56c03b40f1 100644
--- a/tests/benchdnn/utils/impl_filter.hpp
+++ b/tests/benchdnn/utils/impl_filter.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -21,18 +21,29 @@
 #include <vector>
 
 struct impl_filter_t {
-    impl_filter_t() {};
-    impl_filter_t(const std::vector<std::string> &impl_names, bool use_impl)
-        : impl_names_(impl_names), use_impl_(use_impl) {}
+    impl_filter_t() = default;
+    impl_filter_t(const std::vector<std::string> &impl_names, bool use_impl,
+            bool respect_global_filter)
+        : impl_names_(impl_names)
+        , use_impl_(use_impl)
+        , respect_global_filter_(respect_global_filter) {}
 
     bool is_def() const { return impl_names_.empty(); }
 
     const std::vector<std::string> &get_names() const { return impl_names_; }
     const bool use_impl() const { return use_impl_; }
+    const bool respect_global_filter() const { return respect_global_filter_; }
 
 private:
     std::vector<std::string> impl_names_;
-    bool use_impl_; // `true` to `--impl`, `false` to `--skip-impl`.
+    bool use_impl_ = false; // `true` to `--impl`, `false` to `--skip-impl`.
+    // Test objects should respect the global filter. CPU prim_ref objects
+    // shouldn't as it affects correctness validation speed.
+    // Default is set to `true` for the cases when global is initialized. In
+    // such cases local is not initialized but always passed to `fetch_impl`,
+    // thus, to pick up values from global it should indicate the global is
+    // respected.
+    bool respect_global_filter_ = true;
 };
 
 extern impl_filter_t global_impl_filter;
diff --git a/tests/benchdnn/utils/parser.cpp b/tests/benchdnn/utils/parser.cpp
index c9dfbe53aec..df87265ad3c 100644
--- a/tests/benchdnn/utils/parser.cpp
+++ b/tests/benchdnn/utils/parser.cpp
@@ -405,7 +405,7 @@ bool parse_impl_filter(impl_filter_t &impl_filter,
             }
         }
 
-        return impl_filter_t(v, use_impl);
+        return impl_filter_t(v, use_impl, /* respect_global_filter = */ true);
     };
     return parse_single_value_option(impl_filter, def_impl_filter,
             str2impl_filter, str, option_name, help);

From da3d7d3ed45dd513ab5e590157fa13d19212312e Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Mon, 10 Feb 2025 21:29:26 -0800
Subject: [PATCH 18/18] benchdnn: add ref run report status

---
 tests/benchdnn/dnnl_common.hpp         | 35 +++++++++++++++++++-------
 tests/benchdnn/utils/task.hpp          |  7 ++++--
 tests/benchdnn/utils/task_executor.hpp |  6 ++---
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/tests/benchdnn/dnnl_common.hpp b/tests/benchdnn/dnnl_common.hpp
index de181779a39..df8f2bc5c2e 100644
--- a/tests/benchdnn/dnnl_common.hpp
+++ b/tests/benchdnn/dnnl_common.hpp
@@ -617,6 +617,12 @@ void check_correctness(const prb_t *prb, const std::vector<data_kind_t> &kinds,
         TIME_COMPARE(check_buffer_overwrite(args.dnn_mem(i), args.arg(i), res));
     }
 
+    // Report prim_ref run status for easier distinguishing between GPU failures
+    // and ref CPU failures.
+    if (prim_ref) {
+        BENCHDNN_PRINT(1, "run ref: %s\n", res->prim_ref_repro.c_str());
+    }
+
     TIME_REF(compute_ref(prb, ref_args, prim_ref));
 
     for (const auto &kind : kinds) {
@@ -644,15 +650,6 @@ void check_correctness(const prb_t *prb, const std::vector<data_kind_t> &kinds,
                 cpu_cache_args.L2_size, cpu_cache_args.L3_size,
                 benchdnn_get_max_threads(),
                 query_impl_info(query_pd(prim_ref)).c_str());
-
-        // Replace engine kind for repro line from GPU to CPU.
-        const auto eng_pos = res->prim_ref_repro.find("engine=gpu");
-        if (eng_pos != std::string::npos)
-            // Replace `g` in `gpu` with `c`
-            res->prim_ref_repro[eng_pos + 7] = 'c';
-
-        BENCHDNN_PRINT(
-                0, "[PRIM_REF][REPRO]: %s\n", res->prim_ref_repro.c_str());
     }
 }
 
@@ -1086,7 +1083,27 @@ int init_prim_ref_common(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
 
     BENCHDNN_PRINT(5, "CPU reference oneDNN implementation: %s\n",
             query_impl_info(pdw).c_str());
+
     res->prim_ref_repro = prb_cpu->str();
+    // Replace engine kind for repro line from GPU to CPU.
+    const auto eng_pos = res->prim_ref_repro.find("engine=gpu");
+    if (eng_pos != std::string::npos) {
+        // Replace `g` in `gpu` with `c`
+        res->prim_ref_repro[eng_pos + 7] = 'c';
+    }
+
+    // Remove `--impl=XXX` as it doesn't affect prim_ref.
+    const auto impl_pos = res->prim_ref_repro.find("--impl=");
+    if (impl_pos != std::string::npos) {
+        // Search for the next space starting from `impl_pos` as names' length
+        // is variadic.
+        const auto end_impl_pos
+                = res->prim_ref_repro.find_first_of(" ", impl_pos);
+        assert(end_impl_pos != std::string::npos);
+        // `+ 1` is for extra space.
+        res->prim_ref_repro.erase(impl_pos, end_impl_pos - impl_pos + 1);
+    }
+
     prim_ref.reset(prim_ref_ptr);
     return OK;
 }
diff --git a/tests/benchdnn/utils/task.hpp b/tests/benchdnn/utils/task.hpp
index 7c1a8654e29..8b18624b925 100644
--- a/tests/benchdnn/utils/task.hpp
+++ b/tests/benchdnn/utils/task.hpp
@@ -38,8 +38,11 @@ struct task_t {
         , perf_template_(perf_template)
         , idx_(idx) {}
 
-    int create() {
-        BENCHDNN_PRINT(1, "create: %s\n", prb_.str());
+    int create(bool in_parallel) {
+        // Report creation status for problems only in sequential mode as in
+        // parallel it's still not clear which one failed.
+        if (!in_parallel) BENCHDNN_PRINT(1, "create: %s\n", prb_.str());
+
         if (skip_start(&res_, idx_)) return OK;
         if (bench_mode == bench_mode_t::list) return res_.state = LISTED, OK;
 
diff --git a/tests/benchdnn/utils/task_executor.hpp b/tests/benchdnn/utils/task_executor.hpp
index ddd19973ac8..7b752615321 100644
--- a/tests/benchdnn/utils/task_executor.hpp
+++ b/tests/benchdnn/utils/task_executor.hpp
@@ -60,10 +60,10 @@ struct task_executor_t {
         // Special case is needed for THREADPOOL RUNTIME. Both `Parallel_nd` and
         // `createit` calls activate threadpool which causes undesired behavior.
         if (tasks_.size() == 1)
-            tasks_[0].create();
+            tasks_[0].create(/* in_parallel = */ false);
         else
-            benchdnn_parallel_nd(
-                    tasks_.size(), [&](int i) { tasks_[i].create(); });
+            benchdnn_parallel_nd(tasks_.size(),
+                    [&](int i) { tasks_[i].create(/* in_parallel = */ true); });
 
         // Check caches first to avoid filling cache with service reorders.
         for (auto &t : tasks_) {