ROCm · bghimireamd · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
@@ -128,6 +128,7 @@ set(INTEGRATION_TESTS_EXE hipdnn_integration_tests)
 
 add_executable(${INTEGRATION_TESTS_EXE}
     src/main.cpp
+    src/harness/bundle/IntegrationBundleVerificationHarness.cpp
 )
 
 add_subdirectory(src/integration_tests)

@@ -3,9 +3,13 @@
 
 #pragma once
 
+#include <stdexcept>
+#include <string>
+
 #include <hipdnn_test_sdk/utilities/cpu_graph_executor/CpuReferenceGraphExecutor.hpp>
 
 #include "IReferenceGraphExecutor.hpp"
+#include "ReferenceCapabilityError.hpp"
 
 namespace hipdnn_integration_tests
 {
@@ -17,7 +21,25 @@ class CpuReferenceGraphExecutorAdapter : public IReferenceGraphExecutor
                  size_t size,
                  const std::unordered_map<int64_t, void*>& variantPack) override
     {
-        _executor.execute(graphBuffer, size, variantPack);
+        // The shared test_sdk CPU executor throws a plain std::runtime_error for
+        // BOTH "no plan for this op" (capability miss, case A) and a genuine
+        // runtime failure (case C) — it does not distinguish them by type. We
+        // cannot tell them apart here, so we conservatively translate every throw
+        // into a ReferenceCapabilityError (case A), carrying the original message
+        // so a real failure still surfaces in the unverifiable report. Net effect:
+        // a CPU-ref crash routes as "couldn't run" rather than a hard FAIL. The
+        // GPU executor (our code) keeps full A-vs-C fidelity by throwing the right
+        // type at the source.
+        try
+        {
+            _executor.execute(graphBuffer, size, variantPack);
+        }
+        catch(const std::exception& e)
+        {
+            throw ReferenceCapabilityError(std::string("CPU reference executor could not run "
+                                                       "this graph: ")
+                                           + e.what());
+        }
     }
 
     bool requiresDeviceMemory() const override

@@ -17,6 +17,8 @@
 #include <hipdnn_test_sdk/utilities/TestUtilities.hpp>
 #include <hipdnn_test_sdk/utilities/cpu_graph_executor/CpuReferenceGraphExecutor.hpp>
 
+#include "harness/TomlGuards.hpp"
+
 namespace hipdnn_integration_tests
 {
 
@@ -47,12 +49,16 @@ class TestGoldenReferenceCpu : public ::testing::TestWithParam<std::filesystem::
 
         _graphAndTensors = hipdnn_test_sdk::utilities::loadGraphAndTensors(path);
         _referenceOutputTensors = _graphAndTensors.extractAndClearOutputTensorData();
+
+        skipIfTomlMatched(currentTestName());
     }
 
     void goldenReferenceTestSuite(float absoluteTolerance, float relativeTolerance)
     {
         SKIP_IF_WINDOWS();
 
+        applyTomlToleranceOverride(currentTestName(), absoluteTolerance, relativeTolerance);
+
         auto tensorMap = _graphAndTensors.hostBufferMap();
 
         hipdnn_test_sdk::utilities::CpuReferenceGraphExecutor().execute(

@@ -11,8 +11,6 @@
 #include <hipdnn_frontend/Graph.hpp>
 #include <hipdnn_frontend/Utilities.hpp>
 #include <hipdnn_frontend/attributes/TensorAttributes.hpp>
-#include <hipdnn_frontend/node/RMSNormNode.hpp>
-#include <hipdnn_frontend/node/ReductionNode.hpp>
 #include <hipdnn_plugin_sdk/PluginLogging.hpp>
 #include <hipdnn_test_sdk/utilities/CpuFpReferenceMiopenRmsValidation.hpp>
 #include <hipdnn_test_sdk/utilities/CpuFpReferenceValidation.hpp>
@@ -29,6 +27,9 @@
 #include "harness/SharedHandle.hpp"
 #include "harness/SupportMatrixCollector.hpp"
 #include "harness/TestConfig.hpp"
+#include "harness/TomlGuards.hpp"
+#include "harness/input_init/SynthesizeInputs.hpp"
+#include "harness/tolerance/ToleranceResolver.hpp"
 
 namespace hipdnn_integration_tests
 {
@@ -41,7 +42,6 @@ template <typename DataType, typename TestCaseType>
 class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<TestCaseType>
 {
 protected:
-    int _deviceId = 0;
     std::string _testCaseNote;
     std::string _testCaseLayout;
     std::unordered_map<int64_t, std::string> _tensorIdToNameMap;
@@ -53,20 +53,10 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<Test
     {
         SKIP_IF_NO_DEVICES();
 
-        // Initialize HIP
-        ASSERT_EQ(hipInit(0), hipSuccess);
-        ASSERT_EQ(hipGetDevice(&_deviceId), hipSuccess);
-
-        // Check for any engine specific test skips
-        if(auto* info = ::testing::UnitTest::GetInstance()->current_test_info(); info != nullptr)
-        {
-            const std::string testName = std::string(info->test_suite_name()) + "." + info->name();
-            if(auto skipReason = TestConfig::get().findSkipForTest(testName))
-            {
-                GTEST_SKIP() << "[arch " << TestConfig::get().getCurrentArch() << "] "
-                             << *skipReason;
-            }
-        }
+        // HIP initializes lazily on first runtime use; the shared hipdnn handle
+        // (getSharedHandle -> hipdnnCreate) does this before any graph executes,
+        // so no explicit hipInit is needed here.
+        skipIfTomlMatched(currentTestName());
     }
 
     void setTestCaseNote(std::string note)
@@ -81,41 +71,64 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<Test
 
     virtual void runGraphTest() = 0;
 
-    // Determine tolerance for an output tensor based on the graph and
-    // configured tolerance mode for the engine.
+    // Determine the FINAL tolerance for an output tensor: an aggregation-policy
+    // default plus the TOML per-test override, both via
+    // harness/tolerance/ToleranceResolver.hpp. The resolver is keyed on the
+    // serialized flatbuffer graph: we serialize with to_binary() — the same
+    // pattern initializeBundle() already uses — and read the output tensor's dtype
+    // from the flatbuffer.
+    //
+    // Policy = outputOpTolerance (the last non-Pointwise op), which reproduces
+    // this harness's historical getTolerance() behavior so the C++ graph tests
+    // keep their exact tolerances. (The bundle harness uses the maxAcrossNodes
+    // default; the two agree for the common one-real-op + activation case.) The
+    // returned value is already overridden, so registerValidator stores it as-is.
     float getTolerance(const hipdnn_frontend::graph::Graph& graph,
                        const std::shared_ptr<hipdnn_frontend::graph::TensorAttributes>& output)
     {
         ToleranceMode mode = TestConfig::get().getToleranceMode();
+        if(mode != ToleranceMode::DEFAULT)
+        {
+            ADD_FAILURE() << "getTolerance: unhandled tolerance mode";
+            return 0.0f;
+        }
 
-        if(mode == ToleranceMode::DEFAULT)
+        auto [serialized, serErr] = graph.to_binary();
+        if(serErr.code != hipdnn_frontend::ErrorCode::OK || serialized.empty())
         {
-            // We determine the tolerance based on the last non-PointwiseNode
-            // (the root op). This will be gradually updated to use dynamic
-            // calculation as possible; eventually, the tolerance will be
-            // entirely dynamically determined in the default case.
-            //
-            // NOTE: after validate(), the graph's sub-nodes are in topological order.
-            const hipdnn_frontend::graph::INode* rootOp = nullptr;
-            graph.visit([&](const hipdnn_frontend::graph::INode& node) {
-                if(dynamic_cast<const hipdnn_frontend::graph::PointwiseNode*>(&node) == nullptr
-                   && dynamic_cast<const hipdnn_frontend::graph::Graph*>(&node) == nullptr)
-                {
-                    rootOp = &node;
-                }
-            });
+            ADD_FAILURE() << "getTolerance: graph serialization failed";
+            return 0.0f;
+        }
 
-            if(rootOp == nullptr)
-            {
-                ADD_FAILURE() << "getTolerance: no root op found in graph";
-                return 0.0f;
-            }
+        const auto wrapper
+            = hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper::fromSerializedBlob(
+                serialized.data(), serialized.size());
+        if(!wrapper.isValid())
+        {
+            ADD_FAILURE() << "getTolerance: serialized graph failed verification";
+            return 0.0f;
+        }
 
-            return toleranceForNode(*rootOp, output->get_data_type());
+        const auto& tensorMap = wrapper.getTensorMap();
+        const auto it = tensorMap.find(output->get_uid());
+        if(it == tensorMap.end())
+        {
+            ADD_FAILURE() << "getTolerance: output tensor uid " << output->get_uid()
+                          << " not found in serialized graph";
+            return 0.0f;
         }
 
-        ADD_FAILURE() << "getTolerance: unhandled tolerance mode";
-        return 0.0f;
+        float atol = 0.0f;
+        float rtol = 0.0f;
+        tolerance::resolveTolerance(wrapper,
+                                    it->second->data_type(),
+                                    currentTestName(),
+                                    atol,
+                                    rtol,
+                                    tolerance::outputOpTolerance);
+        // getTolerance's single-float contract predates split atol/rtol; under the
+        // current resolver the two are equal (same default, same override).
+        return atol;
     }
 
     void verifyGraph(hipdnn_frontend::graph::Graph& graph, unsigned int seed)
@@ -201,6 +214,9 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<Test
             << "At least one output tensor id must be specified for "
                "validation.";
 
+        tolerance::warnIfMultipleOutputs(gpuBundle.outputTensorIds.size(),
+                                         "IntegrationGraphVerificationHarness");
+
         HIPDNN_PLUGIN_LOG_INFO("Validating " << gpuBundle.outputTensorIds.size()
                                              << " output tensors");
 
@@ -253,25 +269,10 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<Test
                            float absoluteTolerance,
                            float relativeTolerance)
     {
-        // Check for per-test tolerance override from TOML config
-        float finalAtol = absoluteTolerance;
-        float finalRtol = relativeTolerance;
-
-        auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info();
-        if(testInfo != nullptr)
-        {
-            std::string testName
-                = std::string(testInfo->test_suite_name()) + "." + testInfo->name();
-            auto override = TestConfig::get().findToleranceOverride(testName);
-            if(override.has_value())
-            {
-                finalAtol = override->atol;
-                finalRtol = override->rtol;
-                HIPDNN_PLUGIN_LOG_INFO("Tolerance override applied for " << testName
-                                                                         << ": atol=" << finalAtol
-                                                                         << " rtol=" << finalRtol);
-            }
-        }
+        // Tolerances arrive already resolved (default + TOML override) from
+        // getTolerance via ToleranceResolver; no override is applied here.
+        const float finalAtol = absoluteTolerance;
+        const float finalRtol = relativeTolerance;
 
         // Since the graph can infer properties + Ids, we defer validator registration until right
         // before validation in verifyGraph
@@ -332,67 +333,82 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam<Test
         });
     }
 
-    virtual void initializeBundle([[maybe_unused]] const hipdnn_frontend::graph::Graph& graph,
+    virtual void initializeBundle(const hipdnn_frontend::graph::Graph& graph,
                                   hipdnn_test_sdk::utilities::GraphTensorBundle& bundle,
                                   unsigned int seed)
     {
         bundle.sentinelFillOutputTensors();
 
-        for(auto& tensorPair : bundle.tensors)
+        auto [serialized, serErr] = graph.to_binary();
+        if(serErr.code != hipdnn_frontend::ErrorCode::OK || serialized.empty())
         {
-            if(!bundle.isOutput(tensorPair.first))
+            initializeBundleFallback(bundle, seed);
+            return;
+        }
+
+        const auto* fb = hipdnn_flatbuffers_sdk::data_objects::GetGraph(serialized.data());
+        if(fb == nullptr || fb->nodes() == nullptr)
+        {
+            initializeBundleFallback(bundle, seed);
+            return;
+        }
+
+        std::vector<int64_t> leafInputUids;
+        InputTensorMap inputs;
+        for(auto& [uid, tensor] : bundle.tensors)
+        {
+            if(!bundle.isOutput(uid))
             {
-                bundle.randomizeTensor(tensorPair.first, -1.0f, 1.0f, seed);
+                leafInputUids.push_back(uid);
+                inputs[uid] = std::move(tensor);
             }
         }
-    }
 
-    static float toleranceForNode(const hipdnn_frontend::graph::INode& node,
-                                  hipdnn_frontend::DataType dataType)
-    {
-        switch(dataType)
+        std::mt19937 rng(seed);
+        SynthesisTracker tracker(leafInputUids, inputs);
+
+        bool synthesisOk = true;
+        for(const auto* node : *fb->nodes())
         {
-        case hipdnn_frontend::DataType::FLOAT:
-            return toleranceForNodeTyped<float>(node);
-        case hipdnn_frontend::DataType::HALF:
-            return toleranceForNodeTyped<half>(node);
-        case hipdnn_frontend::DataType::BFLOAT16:
-            return toleranceForNodeTyped<bfloat16>(node);
-        default:
-            ADD_FAILURE() << "toleranceForNode: unsupported data type";
-            return 0.0f;
+            if(node == nullptr)
+            {
+                continue;
+            }
+            auto result = synthesizeNodeInputs(*node, tracker, rng);
+            if(!result.filled)
+            {
+                synthesisOk = false;
+                break;
+            }
+        }
+
+        if(synthesisOk)
+        {
+            auto finalResult = tracker.finish("synthesis");
+            synthesisOk = finalResult.filled;
+        }
+
+        for(auto& [uid, tensor] : inputs)
+        {
+            bundle.tensors[uid] = std::move(tensor);
+        }
+
+        if(!synthesisOk)
+        {
+            initializeBundleFallback(bundle, seed);
         }
     }
 
-    template <typename T>
-    static float toleranceForNodeTyped(const hipdnn_frontend::graph::INode& node)
+    void initializeBundleFallback(hipdnn_test_sdk::utilities::GraphTensorBundle& bundle,
+                                  unsigned int seed)
     {
-        namespace fe = hipdnn_frontend::graph;
-        using namespace hipdnn_test_sdk::utilities;
-
-        if(dynamic_cast<const fe::ConvolutionFpropNode*>(&node) != nullptr)
-            return static_cast<float>(conv::getToleranceFwd<T>());
-        if(dynamic_cast<const fe::ConvolutionDgradNode*>(&node) != nullptr)
-            return static_cast<float>(conv::getToleranceBwd<T>());
-        if(dynamic_cast<const fe::ConvolutionWgradNode*>(&node) != nullptr)
-            return static_cast<float>(conv::getToleranceWrw<T>());
-        if(dynamic_cast<const fe::BatchnormInferenceNodeVarianceExt*>(&node) != nullptr)
-            return static_cast<float>(batchnorm::getToleranceInferenceWithVariance<T>());
-        if(dynamic_cast<const fe::BatchnormInferenceNode*>(&node) != nullptr)
-            return static_cast<float>(batchnorm::getToleranceInference<T>());
-        if(dynamic_cast<const fe::BatchnormNode*>(&node) != nullptr)
-            return static_cast<float>(batchnorm::getToleranceTraining<T>());
-        if(dynamic_cast<const fe::BatchnormBackwardNode*>(&node) != nullptr)
-            return static_cast<float>(batchnorm::getToleranceBackward<T>());
-        if(dynamic_cast<const fe::MatmulNode*>(&node) != nullptr)
-            return static_cast<float>(matmul::getTolerance<T>());
-        if(dynamic_cast<const fe::ReductionNode*>(&node) != nullptr)
-            return static_cast<float>(reduction::getTolerance<T>());
-        if(dynamic_cast<const fe::RMSNormNode*>(&node) != nullptr)
-            return static_cast<float>(rmsnorm::getTolerance<T>());
-
-        ADD_FAILURE() << "toleranceForNodeTyped: unsupported node type";
-        return 0.0f;
+        for(auto& [uid, tensor] : bundle.tensors)
+        {
+            if(!bundle.isOutput(uid))
+            {
+                bundle.randomizeTensor(uid, -1.0f, 1.0f, seed);
+            }
+        }
     }
 
     void executeGpuGraph(hipdnnHandle_t handle,