diff --git a/dnn-providers/integration-tests/CMakeLists.txt b/dnn-providers/integration-tests/CMakeLists.txt index c47dcbdd529f..6daf3bc367c6 100644 --- a/dnn-providers/integration-tests/CMakeLists.txt +++ b/dnn-providers/integration-tests/CMakeLists.txt @@ -128,6 +128,7 @@ set(INTEGRATION_TESTS_EXE hipdnn_integration_tests) add_executable(${INTEGRATION_TESTS_EXE} src/main.cpp + src/harness/bundle/IntegrationBundleVerificationHarness.cpp ) add_subdirectory(src/integration_tests) diff --git a/dnn-providers/integration-tests/src/harness/CpuReferenceGraphExecutorAdapter.hpp b/dnn-providers/integration-tests/src/harness/CpuReferenceGraphExecutorAdapter.hpp index ff80def310de..3c16ff9f04c0 100644 --- a/dnn-providers/integration-tests/src/harness/CpuReferenceGraphExecutorAdapter.hpp +++ b/dnn-providers/integration-tests/src/harness/CpuReferenceGraphExecutorAdapter.hpp @@ -3,9 +3,13 @@ #pragma once +#include +#include + #include #include "IReferenceGraphExecutor.hpp" +#include "ReferenceCapabilityError.hpp" namespace hipdnn_integration_tests { @@ -17,7 +21,25 @@ class CpuReferenceGraphExecutorAdapter : public IReferenceGraphExecutor size_t size, const std::unordered_map& variantPack) override { - _executor.execute(graphBuffer, size, variantPack); + // The shared test_sdk CPU executor throws a plain std::runtime_error for + // BOTH "no plan for this op" (capability miss, case A) and a genuine + // runtime failure (case C) — it does not distinguish them by type. We + // cannot tell them apart here, so we conservatively translate every throw + // into a ReferenceCapabilityError (case A), carrying the original message + // so a real failure still surfaces in the unverifiable report. Net effect: + // a CPU-ref crash routes as "couldn't run" rather than a hard FAIL. The + // GPU executor (our code) keeps full A-vs-C fidelity by throwing the right + // type at the source. + try + { + _executor.execute(graphBuffer, size, variantPack); + } + catch(const std::exception& e) + { + throw ReferenceCapabilityError(std::string("CPU reference executor could not run " + "this graph: ") + + e.what()); + } } bool requiresDeviceMemory() const override diff --git a/dnn-providers/integration-tests/src/harness/GoldenReferenceCpu.hpp b/dnn-providers/integration-tests/src/harness/GoldenReferenceCpu.hpp index c36adc19fb62..1f21a1978487 100644 --- a/dnn-providers/integration-tests/src/harness/GoldenReferenceCpu.hpp +++ b/dnn-providers/integration-tests/src/harness/GoldenReferenceCpu.hpp @@ -17,6 +17,8 @@ #include #include +#include "harness/TomlGuards.hpp" + namespace hipdnn_integration_tests { @@ -47,12 +49,16 @@ class TestGoldenReferenceCpu : public ::testing::TestWithParam #include #include -#include -#include #include #include #include @@ -29,6 +27,9 @@ #include "harness/SharedHandle.hpp" #include "harness/SupportMatrixCollector.hpp" #include "harness/TestConfig.hpp" +#include "harness/TomlGuards.hpp" +#include "harness/input_init/SynthesizeInputs.hpp" +#include "harness/tolerance/ToleranceResolver.hpp" namespace hipdnn_integration_tests { @@ -41,7 +42,6 @@ template class IntegrationGraphVerificationHarness : public ::testing::TestWithParam { protected: - int _deviceId = 0; std::string _testCaseNote; std::string _testCaseLayout; std::unordered_map _tensorIdToNameMap; @@ -53,20 +53,10 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParamcurrent_test_info(); info != nullptr) - { - const std::string testName = std::string(info->test_suite_name()) + "." + info->name(); - if(auto skipReason = TestConfig::get().findSkipForTest(testName)) - { - GTEST_SKIP() << "[arch " << TestConfig::get().getCurrentArch() << "] " - << *skipReason; - } - } + // HIP initializes lazily on first runtime use; the shared hipdnn handle + // (getSharedHandle -> hipdnnCreate) does this before any graph executes, + // so no explicit hipInit is needed here. + skipIfTomlMatched(currentTestName()); } void setTestCaseNote(std::string note) @@ -81,41 +71,64 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParam& output) { ToleranceMode mode = TestConfig::get().getToleranceMode(); + if(mode != ToleranceMode::DEFAULT) + { + ADD_FAILURE() << "getTolerance: unhandled tolerance mode"; + return 0.0f; + } - if(mode == ToleranceMode::DEFAULT) + auto [serialized, serErr] = graph.to_binary(); + if(serErr.code != hipdnn_frontend::ErrorCode::OK || serialized.empty()) { - // We determine the tolerance based on the last non-PointwiseNode - // (the root op). This will be gradually updated to use dynamic - // calculation as possible; eventually, the tolerance will be - // entirely dynamically determined in the default case. - // - // NOTE: after validate(), the graph's sub-nodes are in topological order. - const hipdnn_frontend::graph::INode* rootOp = nullptr; - graph.visit([&](const hipdnn_frontend::graph::INode& node) { - if(dynamic_cast(&node) == nullptr - && dynamic_cast(&node) == nullptr) - { - rootOp = &node; - } - }); + ADD_FAILURE() << "getTolerance: graph serialization failed"; + return 0.0f; + } - if(rootOp == nullptr) - { - ADD_FAILURE() << "getTolerance: no root op found in graph"; - return 0.0f; - } + const auto wrapper + = hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper::fromSerializedBlob( + serialized.data(), serialized.size()); + if(!wrapper.isValid()) + { + ADD_FAILURE() << "getTolerance: serialized graph failed verification"; + return 0.0f; + } - return toleranceForNode(*rootOp, output->get_data_type()); + const auto& tensorMap = wrapper.getTensorMap(); + const auto it = tensorMap.find(output->get_uid()); + if(it == tensorMap.end()) + { + ADD_FAILURE() << "getTolerance: output tensor uid " << output->get_uid() + << " not found in serialized graph"; + return 0.0f; } - ADD_FAILURE() << "getTolerance: unhandled tolerance mode"; - return 0.0f; + float atol = 0.0f; + float rtol = 0.0f; + tolerance::resolveTolerance(wrapper, + it->second->data_type(), + currentTestName(), + atol, + rtol, + tolerance::outputOpTolerance); + // getTolerance's single-float contract predates split atol/rtol; under the + // current resolver the two are equal (same default, same override). + return atol; } void verifyGraph(hipdnn_frontend::graph::Graph& graph, unsigned int seed) @@ -201,6 +214,9 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParamcurrent_test_info(); - if(testInfo != nullptr) - { - std::string testName - = std::string(testInfo->test_suite_name()) + "." + testInfo->name(); - auto override = TestConfig::get().findToleranceOverride(testName); - if(override.has_value()) - { - finalAtol = override->atol; - finalRtol = override->rtol; - HIPDNN_PLUGIN_LOG_INFO("Tolerance override applied for " << testName - << ": atol=" << finalAtol - << " rtol=" << finalRtol); - } - } + // Tolerances arrive already resolved (default + TOML override) from + // getTolerance via ToleranceResolver; no override is applied here. + const float finalAtol = absoluteTolerance; + const float finalRtol = relativeTolerance; // Since the graph can infer properties + Ids, we defer validator registration until right // before validation in verifyGraph @@ -332,67 +333,82 @@ class IntegrationGraphVerificationHarness : public ::testing::TestWithParamnodes() == nullptr) + { + initializeBundleFallback(bundle, seed); + return; + } + + std::vector leafInputUids; + InputTensorMap inputs; + for(auto& [uid, tensor] : bundle.tensors) + { + if(!bundle.isOutput(uid)) { - bundle.randomizeTensor(tensorPair.first, -1.0f, 1.0f, seed); + leafInputUids.push_back(uid); + inputs[uid] = std::move(tensor); } } - } - static float toleranceForNode(const hipdnn_frontend::graph::INode& node, - hipdnn_frontend::DataType dataType) - { - switch(dataType) + std::mt19937 rng(seed); + SynthesisTracker tracker(leafInputUids, inputs); + + bool synthesisOk = true; + for(const auto* node : *fb->nodes()) { - case hipdnn_frontend::DataType::FLOAT: - return toleranceForNodeTyped(node); - case hipdnn_frontend::DataType::HALF: - return toleranceForNodeTyped(node); - case hipdnn_frontend::DataType::BFLOAT16: - return toleranceForNodeTyped(node); - default: - ADD_FAILURE() << "toleranceForNode: unsupported data type"; - return 0.0f; + if(node == nullptr) + { + continue; + } + auto result = synthesizeNodeInputs(*node, tracker, rng); + if(!result.filled) + { + synthesisOk = false; + break; + } + } + + if(synthesisOk) + { + auto finalResult = tracker.finish("synthesis"); + synthesisOk = finalResult.filled; + } + + for(auto& [uid, tensor] : inputs) + { + bundle.tensors[uid] = std::move(tensor); + } + + if(!synthesisOk) + { + initializeBundleFallback(bundle, seed); } } - template - static float toleranceForNodeTyped(const hipdnn_frontend::graph::INode& node) + void initializeBundleFallback(hipdnn_test_sdk::utilities::GraphTensorBundle& bundle, + unsigned int seed) { - namespace fe = hipdnn_frontend::graph; - using namespace hipdnn_test_sdk::utilities; - - if(dynamic_cast(&node) != nullptr) - return static_cast(conv::getToleranceFwd()); - if(dynamic_cast(&node) != nullptr) - return static_cast(conv::getToleranceBwd()); - if(dynamic_cast(&node) != nullptr) - return static_cast(conv::getToleranceWrw()); - if(dynamic_cast(&node) != nullptr) - return static_cast(batchnorm::getToleranceInferenceWithVariance()); - if(dynamic_cast(&node) != nullptr) - return static_cast(batchnorm::getToleranceInference()); - if(dynamic_cast(&node) != nullptr) - return static_cast(batchnorm::getToleranceTraining()); - if(dynamic_cast(&node) != nullptr) - return static_cast(batchnorm::getToleranceBackward()); - if(dynamic_cast(&node) != nullptr) - return static_cast(matmul::getTolerance()); - if(dynamic_cast(&node) != nullptr) - return static_cast(reduction::getTolerance()); - if(dynamic_cast(&node) != nullptr) - return static_cast(rmsnorm::getTolerance()); - - ADD_FAILURE() << "toleranceForNodeTyped: unsupported node type"; - return 0.0f; + for(auto& [uid, tensor] : bundle.tensors) + { + if(!bundle.isOutput(uid)) + { + bundle.randomizeTensor(uid, -1.0f, 1.0f, seed); + } + } } void executeGpuGraph(hipdnnHandle_t handle, diff --git a/dnn-providers/integration-tests/src/harness/ReferenceCapabilityError.hpp b/dnn-providers/integration-tests/src/harness/ReferenceCapabilityError.hpp new file mode 100644 index 000000000000..869664ba843e --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/ReferenceCapabilityError.hpp @@ -0,0 +1,40 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +namespace hipdnn_integration_tests +{ + +// Signals "this reference executor has no plan for this op" — a CAPABILITY MISS, +// not a runtime failure. The golden-verification harness distinguishes three +// reference outcomes: +// +// A capability miss — ref cannot run this op -> ReferenceCapabilityError +// B disagreement — ref ran, output != engine-> mismatch at compare time +// C runtime error — ref CAN run it but threw -> any other std::exception +// +// In `auto` mode a case-A miss falls through to the next reference; in explicit +// gpu/cpu mode it SKIPs. A case-C error is loud (auto: fall through + loud +// report; explicit / end-of-auto: FAIL). Throwing the right type at the source +// is what lets the harness tell A from C. +// +// Deriving from std::runtime_error keeps existing `catch(const std::exception&)` +// / `catch(const std::runtime_error&)` call sites working unchanged. +// +// NOTE: the GPU reference executor (our code) throws this directly at its +// capability-miss sites. The CPU reference executor lives in the shared test_sdk +// library and throws a plain std::runtime_error for BOTH A and C; the +// CpuReferenceGraphExecutorAdapter translates that into a ReferenceCapabilityError +// (it cannot tell A from C, so it conservatively treats every test_sdk throw as a +// capability miss and carries the original message for the report). +class ReferenceCapabilityError : public std::runtime_error +{ +public: + using std::runtime_error::runtime_error; +}; + +} // namespace hipdnn_integration_tests diff --git a/dnn-providers/integration-tests/src/harness/TestConfig.hpp b/dnn-providers/integration-tests/src/harness/TestConfig.hpp index e9ed9b27c106..8abcd8b1adae 100644 --- a/dnn-providers/integration-tests/src/harness/TestConfig.hpp +++ b/dnn-providers/integration-tests/src/harness/TestConfig.hpp @@ -35,6 +35,84 @@ enum class ReferenceExecutorType GPU, }; +// How a bundle's engine output is verified (RFC 0010 §4.4). This governs the +// BUNDLE tests only and is independent of ReferenceExecutorType (which governs +// the parameterized tests' choice of which ref executor to exercise). +// +// AUTO — per-test fallback: golden -> GPU ref -> CPU ref -> SKIP+report +// GOLDEN — golden data only; SKIP if a bundle has no golden outputs +// GPU — ignore golden; compare engine against the GPU reference executor +// CPU — ignore golden; compare engine against the CPU reference executor +enum class VerificationMode +{ + AUTO, + GOLDEN, + GPU, + CPU, +}; + +// Parse a verification-mode string (case-insensitive) into the enum. Throws +// std::runtime_error on an unrecognized value. Shared by the CLI flag parser and +// the env-var fallback so both accept exactly the same spellings. +inline VerificationMode parseVerificationMode(std::string value) +{ + std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + if(value == "auto") + { + return VerificationMode::AUTO; + } + if(value == "golden") + { + return VerificationMode::GOLDEN; + } + if(value == "gpu") + { + return VerificationMode::GPU; + } + if(value == "cpu") + { + return VerificationMode::CPU; + } + throw std::runtime_error("Invalid verification mode '" + value + + "'; expected 'auto', 'golden', 'gpu', or 'cpu'"); +} + +// Resolve verification mode: CLI value wins, then env var, then nullopt (caller +// defaults to AUTO). Factored out of TestConfig::initialize() so the resolution +// logic is independently testable. +inline std::optional + resolveVerificationMode(std::optional cliValue) +{ + if(cliValue.has_value()) + { + return cliValue; + } + auto envVal = hipdnn_data_sdk::utilities::getEnv("HIPDNN_TEST_VERIFICATION_MODE"); + if(!envVal.empty()) + { + return parseVerificationMode(envVal); + } + return std::nullopt; +} + +// Resolve golden data dir: CLI value wins, then env var, then nullopt. +inline std::optional + resolveGoldenDataDir(std::optional cliValue) +{ + if(cliValue.has_value()) + { + return cliValue; + } + auto envVal = hipdnn_data_sdk::utilities::getEnv("HIPDNN_TEST_GOLDEN_DATA_DIR"); + if(!envVal.empty()) + { + return std::filesystem::path(envVal); + } + return std::nullopt; +} + // Singleton class for storing CLI-based test configuration. // All arguments are independently optional: // - articlePath: omit to use hipDNN's default plugin discovery @@ -64,7 +142,8 @@ class TestConfig std::optional referenceExecutorType = std::nullopt, bool allowBundles = false, - std::optional goldenDataDir = std::nullopt) + std::optional goldenDataDir = std::nullopt, + std::optional verificationMode = std::nullopt) { TestConfig& instance = get(); if(instance._initialized) @@ -118,15 +197,8 @@ class TestConfig } } - instance._goldenDataDir = std::move(goldenDataDir); - if(!instance._goldenDataDir.has_value()) - { - auto envVal = hipdnn_data_sdk::utilities::getEnv("HIPDNN_TEST_GOLDEN_DATA_DIR"); - if(!envVal.empty()) - { - instance._goldenDataDir = std::filesystem::path(envVal); - } - } + instance._goldenDataDir = resolveGoldenDataDir(std::move(goldenDataDir)); + instance._verificationMode = resolveVerificationMode(verificationMode); // Detect device 0's gfx arch and VRAM once at startup. Used by // [[test_skips]] and golden-ref metadata guards (arch/VRAM checks). @@ -291,6 +363,14 @@ class TestConfig return _goldenDataDir.value(); } + // Bundle verification mode. Resolved once at init: CLI flag > + // HIPDNN_TEST_VERIFICATION_MODE env var > AUTO default. + VerificationMode getVerificationMode() const + { + throwIfNotInitialized(); + return _verificationMode.value_or(VerificationMode::AUTO); + } + private: TestConfig() = default; @@ -307,6 +387,7 @@ class TestConfig std::optional _testSettings; std::optional _referenceExecutorType; std::optional _goldenDataDir; + std::optional _verificationMode; std::string _currentArch; std::size_t _currentDeviceVramMb = 0; std::string _currentPlatform; diff --git a/dnn-providers/integration-tests/src/harness/TomlGuards.hpp b/dnn-providers/integration-tests/src/harness/TomlGuards.hpp new file mode 100644 index 000000000000..ccc098e663a8 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/TomlGuards.hpp @@ -0,0 +1,63 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include + +#include + +#include "harness/TestConfig.hpp" + +namespace hipdnn_integration_tests +{ + +inline std::string currentTestName() +{ + auto* info = ::testing::UnitTest::GetInstance()->current_test_info(); + if(info == nullptr) + { + return {}; + } + return std::string(info->test_suite_name()) + "." + info->name(); +} + +inline bool applyTomlToleranceOverride(const std::string& testName, float& atol, float& rtol) +{ + if(testName.empty()) + { + return false; + } + auto ovr = TestConfig::get().findToleranceOverride(testName); + if(!ovr) + { + return false; + } + atol = ovr->atol; + rtol = ovr->rtol; + HIPDNN_PLUGIN_LOG_INFO("Tolerance override applied for " << testName << ": atol=" << atol + << " rtol=" << rtol); + return true; +} + +inline std::optional checkTomlSkip(const std::string& testName) +{ + if(testName.empty()) + { + return std::nullopt; + } + return TestConfig::get().findSkipForTest(testName); +} + +inline void skipIfTomlMatched(const std::string& testName) +{ + if(auto reason = checkTomlSkip(testName)) + { + GTEST_SKIP() << "[arch " << TestConfig::get().getCurrentArch() << "] " << *reason; + } +} + +} // namespace hipdnn_integration_tests diff --git a/dnn-providers/integration-tests/src/harness/golden/BundleDiscovery.hpp b/dnn-providers/integration-tests/src/harness/bundle/BundleDiscovery.hpp similarity index 96% rename from dnn-providers/integration-tests/src/harness/golden/BundleDiscovery.hpp rename to dnn-providers/integration-tests/src/harness/bundle/BundleDiscovery.hpp index 56ea6dc254ff..800c1288d4ea 100644 --- a/dnn-providers/integration-tests/src/harness/golden/BundleDiscovery.hpp +++ b/dnn-providers/integration-tests/src/harness/bundle/BundleDiscovery.hpp @@ -14,7 +14,7 @@ #include -namespace hipdnn_integration_tests::golden +namespace hipdnn_integration_tests::bundle { // Naming types, kept together. DerivedTestName is the output of deriveTestName() @@ -191,9 +191,11 @@ inline DerivedTestName deriveTestName(const std::filesystem::path& jsonPath, if(relativeDir.empty()) { - throw std::runtime_error( - "Bundle .json must live in a sub-folder of the data root, not at the root itself: " - + jsonPath.string() + "; expected {folder}/{file}.json"); + // --gd points directly at a bundle folder (the .json is at the root). + // Use the folder name as the suite so "--gd .../graph_only_bundle" works. + const std::string suite = sanitizeForGtest(bundleDir.filename().string()); + const std::string test = sanitizeForGtest(jsonPath.stem().string()); + return {suite, test}; } std::string suite; @@ -270,4 +272,4 @@ inline std::vector discoverBundles(const std::filesystem::path return bundles; } -} // namespace hipdnn_integration_tests::golden +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/golden/BundleRegistration.hpp b/dnn-providers/integration-tests/src/harness/bundle/BundleRegistration.hpp similarity index 94% rename from dnn-providers/integration-tests/src/harness/golden/BundleRegistration.hpp rename to dnn-providers/integration-tests/src/harness/bundle/BundleRegistration.hpp index 5f45fc361f5a..6cfbc4e74322 100644 --- a/dnn-providers/integration-tests/src/harness/golden/BundleRegistration.hpp +++ b/dnn-providers/integration-tests/src/harness/bundle/BundleRegistration.hpp @@ -15,10 +15,10 @@ #include #include "harness/TestConfig.hpp" -#include "harness/golden/BundleDiscovery.hpp" -#include "harness/golden/IntegrationGraphGoldenReferenceVerificationHarness.hpp" +#include "harness/bundle/BundleDiscovery.hpp" +#include "harness/bundle/IntegrationBundleVerificationHarness.hpp" -namespace hipdnn_integration_tests::golden +namespace hipdnn_integration_tests::bundle { namespace detail @@ -58,7 +58,7 @@ inline void registerBundles(const std::vector& bundles) __FILE__, __LINE__, [loaded = bundle.bundle, path = bundle.jsonPath]() -> ::testing::Test* { - auto* test = new IntegrationGraphGoldenReferenceVerificationHarness( + auto* test = new IntegrationBundleVerificationHarness( /*requiresDevice=*/true); test->setBundle(loaded, path); return test; @@ -159,7 +159,7 @@ inline void registerBundleTests() detail::registerBundles(bundles); - HIPDNN_PLUGIN_LOG_INFO("Registered " << bundles.size() << " golden bundle test(s)"); + HIPDNN_PLUGIN_LOG_INFO("Registered " << bundles.size() << " bundle test(s)"); } -} // namespace hipdnn_integration_tests::golden +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.cpp b/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.cpp new file mode 100644 index 000000000000..c5ebb2507f31 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.cpp @@ -0,0 +1,660 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "harness/bundle/IntegrationBundleVerificationHarness.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "harness/CpuReferenceGraphExecutorAdapter.hpp" +#include "harness/ReferenceCapabilityError.hpp" +#include "harness/SharedHandle.hpp" +#include "harness/TestConfig.hpp" +#include "harness/TomlGuards.hpp" +#include "harness/bundle/UnverifiableBundleReport.hpp" +#include "harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp" +#include "harness/input_init/SynthesizeInputs.hpp" +#include "harness/tolerance/ToleranceResolver.hpp" + +namespace hipdnn_integration_tests::bundle +{ + +// ---- virtual defaults ------------------------------------------------------ + +void IntegrationBundleVerificationHarness::executeGraphThroughEngine( + std::unordered_map& variantPack) +{ + auto handle = getSharedHandle(); + + const std::vector graphBytes( + _bundle->graphBuffer.data(), _bundle->graphBuffer.data() + _bundle->graphBuffer.size()); + + hipdnn_frontend::graph::Graph graph; + auto err = graph.from_binary(handle, graphBytes); + ASSERT_TRUE(err.is_good()) << "from_binary failed: " << err.get_message(); + + std::vector engineIds; + auto status = graph.get_ranked_engine_ids(engineIds); + + const auto graphSummary = [&] { + return std::to_string(_bundle->outputTensorUids.size()) + " output tensor(s), " + + std::to_string(engineIds.size()) + " ranked engine(s)"; + }; + + if(TestConfig::get().hasEngineName()) + { + int64_t targetEngineId = TestConfig::get().getEngineId(); + if(status.is_bad() + || std::find(engineIds.begin(), engineIds.end(), targetEngineId) == engineIds.end()) + { + throw std::runtime_error("Engine " + std::string(TestConfig::get().getEngineName()) + + " does not support this graph (" + graphSummary() + ")"); + } + graph.set_preferred_engine_id_ext(targetEngineId); + } + else + { + if(status.is_bad() || engineIds.empty()) + { + throw std::runtime_error("No engine supports this graph (" + graphSummary() + ")"); + } + } + + auto result = graph.create_execution_plans(); + ASSERT_TRUE(result.is_good()) << result.get_message(); + result = graph.check_support(); + ASSERT_TRUE(result.is_good()) << result.get_message(); + result = graph.build_plans(); + ASSERT_TRUE(result.is_good()) << result.get_message(); + + int64_t workspaceSize = 0; + result = graph.get_workspace_size(workspaceSize); + ASSERT_TRUE(result.is_good()) << result.get_message(); + ASSERT_GE(workspaceSize, 0); + const hipdnn_data_sdk::utilities::Workspace workspace(static_cast(workspaceSize)); + + result = graph.execute(handle, variantPack, workspace.get()); + ASSERT_TRUE(result.is_good()) << result.get_message(); +} + +void IntegrationBundleVerificationHarness::runReferenceExecutor( + ReferenceExecutorType type, std::unordered_map& variantPack) +{ + auto executor = makeReferenceExecutor(type); + executor->execute(_bundle->graphBuffer.data(), _bundle->graphBuffer.size(), variantPack); +} + +std::unique_ptr + IntegrationBundleVerificationHarness::makeReferenceExecutor(ReferenceExecutorType type) +{ + switch(type) + { + case ReferenceExecutorType::CPU: + return std::make_unique(); + case ReferenceExecutorType::GPU: + return std::make_unique(); + default: + throw std::runtime_error("Unknown reference executor type"); + } +} + +// ---- top-level dispatch ---------------------------------------------------- + +VerificationMode IntegrationBundleVerificationHarness::getVerificationMode() const +{ + return TestConfig::get().getVerificationMode(); +} + +void IntegrationBundleVerificationHarness::runComparison() +{ + if(_bundle->outputTensorUids.empty()) + { + skipUnverifiable("bundle has no output tensors to compare"); + return; + } + + if(!ensureInputsAvailable()) + { + return; + } + + switch(getVerificationMode()) + { + case VerificationMode::GOLDEN: + runGoldenMode(); + return; + case VerificationMode::GPU: + runExplicitRefMode(ReferenceExecutorType::GPU); + return; + case VerificationMode::CPU: + runExplicitRefMode(ReferenceExecutorType::CPU); + return; + case VerificationMode::AUTO: + runAutoMode(); + return; + default: + FAIL() << "Unknown verification mode"; + return; + } +} + +namespace +{ +// GTEST_SKIP() expands to `return;`, so it can only be used from a void-returning +// function. This wrapper records the skip (and its message) and returns from +// itself; the skip state persists for the caller, which then returns nullopt. +void skipEngineCouldNotRun(const std::filesystem::path& bundlePath, const std::string& error) +{ + std::ostringstream msg; + msg << "Engine could not execute bundle " << bundlePath; + if(!error.empty()) + { + msg << ": " << error; + } + GTEST_SKIP() << msg.str(); +} +} // namespace + +std::optional IntegrationBundleVerificationHarness::runEngineOrSkip() +{ + std::string error; + auto engineOutputs = runEngineCapturingOutputs(error); + if(!engineOutputs && !::testing::Test::HasFatalFailure()) + { + skipEngineCouldNotRun(_bundlePath, error); + } + return engineOutputs; +} + +void IntegrationBundleVerificationHarness::runGoldenMode() +{ + if(!_bundle->hasGoldenOutputs) + { + skipUnverifiable("no golden data (verification-mode=golden)"); + return; + } + auto engineOutputs = runEngineOrSkip(); + if(!engineOutputs) + { + return; + } + compareAgainstGolden(*engineOutputs); +} + +void IntegrationBundleVerificationHarness::runExplicitRefMode(ReferenceExecutorType type) +{ + auto engineOutputs = runEngineOrSkip(); + if(!engineOutputs) + { + return; + } + + OutputTensors refOutputs; + const RefRunResult result = runReferenceCapturingOutputs(type, refOutputs); + switch(result.status) + { + case RefStatus::CAPABILITY_MISS: + skipUnverifiable(refLabel(type) + " cannot run this op: " + result.message); + return; + case RefStatus::RUNTIME_ERROR: + recordRefError(refLabel(type) + " errored: " + result.message); + FAIL() << refLabel(type) << " errored (verification-mode=" << refLabel(type) + << "): " << result.message; + return; + case RefStatus::RAN: + compareOutputs(*engineOutputs, refOutputs); + return; + default: + FAIL() << "Unknown RefStatus"; + return; + } +} + +void IntegrationBundleVerificationHarness::runAutoMode() +{ + auto engineOutputs = runEngineOrSkip(); + if(!engineOutputs) + { + return; + } + + if(_bundle->hasGoldenOutputs) + { + compareAgainstGolden(*engineOutputs); + return; + } + + // GPU ref (non-final): capability miss or runtime error -> fall through. + bool gpuRefErrored = false; + { + OutputTensors refOutputs; + const RefRunResult gpu + = runReferenceCapturingOutputs(ReferenceExecutorType::GPU, refOutputs); + if(gpu.status == RefStatus::RAN) + { + compareOutputs(*engineOutputs, refOutputs); + return; + } + if(gpu.status == RefStatus::RUNTIME_ERROR) + { + gpuRefErrored = true; + recordRefError("GPU reference errored (auto mode, falling through to CPU): " + + gpu.message); + } + } + + // CPU ref (final): capability miss -> unverifiable; runtime error -> FAIL. + { + OutputTensors refOutputs; + const RefRunResult cpu + = runReferenceCapturingOutputs(ReferenceExecutorType::CPU, refOutputs); + switch(cpu.status) + { + case RefStatus::CAPABILITY_MISS: + skipUnverifiable(gpuRefErrored + ? "no usable reference (golden absent; GPU ref errored, CPU ref " + "cannot run this op; see reference-error report): " + + cpu.message + : "no reference available (golden absent; GPU and CPU ref " + "cannot run this op): " + + cpu.message); + return; + case RefStatus::RUNTIME_ERROR: + recordRefError("CPU reference errored (auto mode, last resort): " + cpu.message); + FAIL() << "CPU reference errored (auto mode, last resort): " << cpu.message; + return; + case RefStatus::RAN: + compareOutputs(*engineOutputs, refOutputs); + return; + default: + FAIL() << "Unknown RefStatus"; + return; + } + } +} + +// ---- inputs ---------------------------------------------------------------- + +bool IntegrationBundleVerificationHarness::ensureInputsAvailable() +{ + if(_bundle->tensors.has_value()) + { + return true; + } + return synthesizeInputs(); +} + +bool IntegrationBundleVerificationHarness::synthesizeInputs() +{ + const auto wrapper = _bundle->graphWrapper(); + const auto& tensorAttrMap = wrapper.getTensorMap(); + const std::set outputUids(_bundle->outputTensorUids.begin(), + _bundle->outputTensorUids.end()); + + InputTensorMap inputs; + std::vector allLeafInputUids; + for(const auto& [uid, attrs] : tensorAttrMap) + { + if(attrs->virtual_() || outputUids.count(uid) != 0) + { + continue; + } + inputs[uid] = hipdnn_test_sdk::detail::createTensorFromAttribute(*attrs); + inputs[uid]->fillTensorWithValue(0.f); + allLeafInputUids.push_back(uid); + } + + std::mt19937 rng( + static_cast(_bundle->metadata.seed.value_or(K_DEFAULT_SEED))); + + SynthesisTracker tracker(allLeafInputUids, inputs); + for(uint32_t i = 0; i < wrapper.nodeCount(); ++i) + { + const auto& node = wrapper.getNode(i); + const SynthesisResult outcome = synthesizeNodeInputs(node, tracker, rng); + if(!outcome.filled) + { + skipUnverifiable(outcome.reason); + return false; + } + } + + const SynthesisResult finalResult = tracker.finish("synthesis"); + if(!finalResult.filled) + { + skipUnverifiable(finalResult.reason); + return false; + } + + _bundle->tensors = std::move(inputs); + return true; +} + +// ---- engine + reference runs ----------------------------------------------- + +// Output buffers are filled with a sentinel (NaN for float types, type max for +// integer types) rather than zero. This is the standard hipdnn practice — see +// CpuReferenceGraphExecutor and GraphTensorBundle::sentinelFillOutputTensors — +// and it arms allClose's NaN/sentinel guard: any output element the executor +// fails to write stays NaN and is caught as a hard failure. Zero-filling would +// make an unwritten output indistinguishable from a legitimately-computed zero, +// so engine and reference could silently agree on garbage (both untouched zeros) +// and the comparison would vacuously pass. +OutputTensors IntegrationBundleVerificationHarness::allocateSentinelOutputs() const +{ + const auto wrapper = _bundle->graphWrapper(); + const auto& tensorAttrMap = wrapper.getTensorMap(); + + OutputTensors outputs; + for(const int64_t uid : _bundle->outputTensorUids) + { + outputs[uid] = hipdnn_test_sdk::detail::createTensorFromAttribute(*tensorAttrMap.at(uid)); + outputs[uid]->fillWithSentinelValue(); + } + return outputs; +} + +std::unordered_map + IntegrationBundleVerificationHarness::buildVariantPack(OutputTensors& outputs, + bool useDevice) const +{ + std::unordered_map variantPack; + const std::set outputUids(_bundle->outputTensorUids.begin(), + _bundle->outputTensorUids.end()); + + for(auto& [uid, tensor] : *_bundle->tensors) + { + if(outputUids.count(uid) != 0) + { + continue; + } + variantPack[uid] = useDevice ? tensor->rawDeviceData() : tensor->rawHostData(); + } + for(auto& [uid, tensor] : outputs) + { + variantPack[uid] = useDevice ? tensor->rawDeviceData() : tensor->rawHostData(); + } + return variantPack; +} + +std::optional + IntegrationBundleVerificationHarness::runEngineCapturingOutputs(std::string& error) +{ + OutputTensors engineOutputs = allocateSentinelOutputs(); + auto variantPack = buildVariantPack(engineOutputs, /*useDevice=*/_requiresDevice); + + bool threw = false; + try + { + executeGraphThroughEngine(variantPack); + } + catch(const std::exception& e) + { + threw = true; + error = e.what(); + } + + if(::testing::Test::HasFatalFailure()) + { + return std::nullopt; + } + if(threw) + { + return std::nullopt; + } + + markOutputsModified(engineOutputs); + return engineOutputs; +} + +IntegrationBundleVerificationHarness::RefRunResult + IntegrationBundleVerificationHarness::runReferenceCapturingOutputs(ReferenceExecutorType type, + OutputTensors& refOutputs) +{ + refOutputs = allocateSentinelOutputs(); + const bool useDevice = _requiresDevice && (type == ReferenceExecutorType::GPU); + auto variantPack = buildVariantPack(refOutputs, useDevice); + + try + { + runReferenceExecutor(type, variantPack); + } + catch(const ReferenceCapabilityError& e) + { + return {RefStatus::CAPABILITY_MISS, e.what()}; + } + catch(const std::exception& e) + { + return {RefStatus::RUNTIME_ERROR, e.what()}; + } + + markOutputsModifiedFor(refOutputs, useDevice); + return {RefStatus::RAN, {}}; +} + +void IntegrationBundleVerificationHarness::markOutputsModified(OutputTensors& outputs) const +{ + markOutputsModifiedFor(outputs, _requiresDevice); +} + +void IntegrationBundleVerificationHarness::markOutputsModifiedFor(OutputTensors& outputs, + bool device) +{ + for(auto& [uid, tensor] : outputs) + { + if(device) + { + tensor->markDeviceModified(); + } + else + { + tensor->markHostModified(); + } + } +} + +// ---- comparison ------------------------------------------------------------ + +void IntegrationBundleVerificationHarness::compareAgainstGolden(OutputTensors& engineOutputs) +{ + compareEach(engineOutputs, [&](int64_t uid) -> hipdnn_data_sdk::utilities::ITensor& { + return *_bundle->tensors->at(uid); + }); +} + +void IntegrationBundleVerificationHarness::compareOutputs(OutputTensors& engineOutputs, + OutputTensors& expected) +{ + compareEach(engineOutputs, [&](int64_t uid) -> hipdnn_data_sdk::utilities::ITensor& { + return *expected.at(uid); + }); +} + +template +void IntegrationBundleVerificationHarness::compareEach(OutputTensors& engineOutputs, + ExpectedLookup expectedFor) +{ + auto wrapper = _bundle->graphWrapper(); + const auto& tensorAttrMap = wrapper.getTensorMap(); + + tolerance::warnIfMultipleOutputs(_bundle->outputTensorUids.size(), + "IntegrationBundleVerificationHarness"); + + for(const int64_t uid : _bundle->outputTensorUids) + { + auto& actualTensor = *engineOutputs.at(uid); + auto& expectedTensor = expectedFor(uid); + + auto* attrs = tensorAttrMap.at(uid); + const auto dataType = attrs->data_type(); + + float atol = 0.0f; + float rtol = 0.0f; + tolerance::resolveTolerance(wrapper, dataType, currentTestName(), atol, rtol); + + compareOutputTensor(uid, *attrs, dataType, expectedTensor, actualTensor, atol, rtol); + } +} + +// ---- reporting helpers ----------------------------------------------------- + +void IntegrationBundleVerificationHarness::skipUnverifiable(const std::string& reason) +{ + UnverifiableBundleReport::get().record( + _bundlePath.string(), reason, UnverifiableSeverity::UNVERIFIABLE); + GTEST_SKIP() << "Unverifiable: " << reason << " (" << _bundlePath << ")"; +} + +void IntegrationBundleVerificationHarness::recordRefError(const std::string& reason) +{ + UnverifiableBundleReport::get().record( + _bundlePath.string(), reason, UnverifiableSeverity::REF_ERROR); +} + +std::string IntegrationBundleVerificationHarness::refLabel(ReferenceExecutorType type) +{ + return type == ReferenceExecutorType::GPU ? "GPU reference" : "CPU reference"; +} + +// ---- comparison + tolerance machinery -------------------------------------- + +void IntegrationBundleVerificationHarness::compareOutputTensor( + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol) const +{ + auto validator = hipdnn_test_sdk::utilities::createAllCloseValidator(dataType, atol, rtol); + const bool passed = validator->allClose(expected, actual); + + if(!passed) + { + std::ostringstream report; + report << reportHeader(uid, attrs, dataType, expected, atol, rtol); + writeTensorDiffReport(report, uid, attrs, dataType, expected, actual, atol, rtol); + EXPECT_TRUE(false) << report.str(); + } +} + +void IntegrationBundleVerificationHarness::writeTensorDiffReport( + std::ostream& os, + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol) +{ + using DT = hipdnn_flatbuffers_sdk::data_objects::DataType; + using hipdnn_data_sdk::types::bfloat16; + using hipdnn_data_sdk::types::half; + + switch(dataType) + { + case DT::FLOAT: + writeFpDiffReport(os, uid, attrs, expected, actual, atol, rtol); + return; + case DT::HALF: + writeFpDiffReport(os, uid, attrs, expected, actual, atol, rtol); + return; + case DT::BFLOAT16: + writeFpDiffReport(os, uid, attrs, expected, actual, atol, rtol); + return; + case DT::DOUBLE: + writeFpDiffReport(os, uid, attrs, expected, actual, atol, rtol); + return; + default: + os << " (no element-wise diff available for this data type)\n"; + } +} + +template +void IntegrationBundleVerificationHarness::writeFpDiffReport( + std::ostream& os, + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol) +{ + const auto summary + = hipdnn_test_sdk::utilities::computeTensorDiff(expected, actual, atol, rtol); + hipdnn_test_sdk::utilities::printTensorDiffSummary(os, labelFor(uid, attrs), summary); +} + +std::string IntegrationBundleVerificationHarness::labelFor( + int64_t uid, const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs) +{ + const auto* name = attrs.name(); + return (name != nullptr && !name->empty()) ? name->str() : ("uid=" + std::to_string(uid)); +} + +std::string IntegrationBundleVerificationHarness::reportHeader( + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + float atol, + float rtol) const +{ + std::ostringstream os; + os << "\nBundle output comparison FAILED\n" + << " Bundle: " << _bundlePath << "\n" + << " Tensor: " << labelFor(uid, attrs) << " (UID " << uid << ", output)\n" + << " Shape: " << hipdnn_test_sdk::utilities::StreamVec(expected.dims()) << " " + << dataTypeName(dataType) << "\n" + << " Tolerance: atol=" << atol << " rtol=" << rtol << "\n"; + return os.str(); +} + +std::string IntegrationBundleVerificationHarness::dataTypeName( + hipdnn_flatbuffers_sdk::data_objects::DataType dataType) +{ + return hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(dataType); +} + +void IntegrationBundleVerificationHarness::applyMetadataGuards() const +{ + // VRAM is an execution-feasibility guard: the engine allocates the same + // buffers and runs the same graph regardless of how its output is verified, + // so this applies to every bundle (golden or reference-verified). + if(auto reason = hipdnn_test_sdk::utilities::checkVramRequirement( + _bundle->metadata, TestConfig::get().getCurrentDeviceVramMb())) + { + GTEST_SKIP() << *reason; + } + + // Arch-lock only matters for golden data: golden output VALUES are + // numerically arch-specific (AITER / GPU-ref generated). Inputs are not + // arch-locked. When there is no golden data the engine output is verified + // against a reference executor run on THIS device, so the bundle's recorded + // arch is irrelevant and must not gate the test. + if(_bundle->hasGoldenOutputs) + { + if(auto reason = hipdnn_test_sdk::utilities::checkArchCompatibility( + _bundle->metadata, TestConfig::get().getCurrentArch())) + { + GTEST_SKIP() << *reason; + } + } +} + +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.hpp b/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.hpp new file mode 100644 index 000000000000..65a28997d2b0 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/bundle/IntegrationBundleVerificationHarness.hpp @@ -0,0 +1,257 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "harness/IReferenceGraphExecutor.hpp" +#include "harness/TestConfig.hpp" +#include "harness/TomlGuards.hpp" +#include "harness/bundle/IntegrationTestBundle.hpp" + +namespace hipdnn_integration_tests::bundle +{ + +// Output tensors, keyed by uid. Used both for the engine's computed "actual" +// outputs and for an expected source (golden from disk, or a reference executor's +// output). Each set is a distinct allocation so engine and reference never write +// the same buffers. +using OutputTensors + = std::unordered_map>; + +// Verifies a bundle's engine output against an expected source chosen by the +// verification mode (RFC 0010 §4.4): +// +// actual = the engine (the system under test), run once into fresh buffers. +// expected = golden data from disk, OR a reference executor's output. +// +// Auto mode fallback chain: golden → GPU ref → CPU ref → SKIP. +// When golden outputs are present on disk, the comparison uses them directly +// and no reference executor is run at all. +// +// Memory invariants for running engine + a reference off the same inputs: +// * INPUT tensors are read-only by both executors and are NEVER mark*Modified(). +// The engine's rawDeviceData() uploads host->device (state becomes BOTH +// valid); a later CPU-ref rawHostData() therefore sees the host copy still +// valid and does NOT download — inputs stay intact across both runs. +// * OUTPUT buffers are separate ITensor objects per executor (engineOutputs vs +// refOutputs), so the two runs cannot stomp each other. Only output buffers +// are mark*Modified(). +// * Virtual (inter-node) tensors are allocated internally by each executor; the +// variant packs we build carry only real (input + output) tensors. +// +// NOTE: Stages 1-3 of init unification are done (ALMIOPEN-1969 follow-up). +// Both harnesses share SynthesisTracker + SynthesizeInputs from harness/input_init/. +// Remaining: 3 non-golden overrides kept for fused-graph range conflicts or +// specialized stress tests (BN backward activ, BN fwd training activ, +// conv backward weights large-values). +class IntegrationBundleVerificationHarness : public ::testing::Test +{ +public: + explicit IntegrationBundleVerificationHarness(bool requiresDevice) + : _requiresDevice(requiresDevice) + { + } + + void setBundle(std::shared_ptr bundle, std::filesystem::path path) + { + _bundle = std::move(bundle); + _bundlePath = std::move(path); + } + +protected: + // NOLINTNEXTLINE(readability-identifier-naming) + void SetUp() override + { + if(_requiresDevice) + { + SKIP_IF_NO_DEVICES(); + } + + if(_bundle == nullptr) + { + GTEST_SKIP() << "No bundle set"; + } + + skipIfTomlMatched(currentTestName()); + + applyMetadataGuards(); + } + + // NOLINTNEXTLINE(readability-identifier-naming) + void TestBody() override + { + runComparison(); + } + + // Builds the graph, selects an engine, and executes. Throws on unsupported graph (→ SKIP). + virtual void executeGraphThroughEngine(std::unordered_map& variantPack); + + // Runs the named reference executor. Throws ReferenceCapabilityError on capability miss. + virtual void runReferenceExecutor(ReferenceExecutorType type, + std::unordered_map& variantPack); + + // Constructs the executor object (CpuReferenceGraphExecutorAdapter or + // GpuReferenceGraphExecutor) — does not allocate buffers or run anything. + // Skipped in auto mode when golden data is present. + virtual std::unique_ptr + makeReferenceExecutor(ReferenceExecutorType type); + + // Returns the active verification mode. Override in tests to inject a mode + // without touching the TestConfig singleton. + virtual VerificationMode getVerificationMode() const; + + // Skips the test when the bundle's metadata is incompatible with the + // current device (VRAM/arch). Virtual so isolated unit tests that don't + // exercise hardware guards can override it — production reads from the + // TestConfig singleton, which is only initialized by the real test main. + virtual void applyMetadataGuards() const; + +private: + bool _requiresDevice; + std::filesystem::path _bundlePath; + std::shared_ptr _bundle; + + static constexpr int64_t K_DEFAULT_SEED = 42; + + enum class RefStatus + { + RAN, + CAPABILITY_MISS, + RUNTIME_ERROR, + }; + struct RefRunResult + { + RefStatus status; + std::string message; + }; + + // ── top-level dispatch ──────────────────────────────────────────────── + void runComparison(); + void runGoldenMode(); + void runExplicitRefMode(ReferenceExecutorType type); + void runAutoMode(); + + // ── inputs ────────────────────────────────────────────────────────── + bool ensureInputsAvailable(); + + // Synthesizes leaf input tensors for the graph when no golden data exists. + // + // Phase 1 — allocate: walks the graph's tensor list, skips virtual + // (inter-node) and output tensors, allocates a CPU-side buffer for + // each remaining leaf input tensor (shape/dtype from TensorAttributes). + // + // Phase 2 — fill: iterates each node (internal op) and calls its + // registered fill function via synthesizeNodeInputs(). Each fill + // function reads its tensor UIDs from the node's attributes and + // declares each one as FREE (random values), STRUCTURED (needs + // specific format), or DERIVED (needs another op's output) through + // a shared SynthesisTracker. + // + // Phase 3 — verify: calls tracker.finish() which checks that every + // leaf input was accounted for by some fill function and none were + // refused (STRUCTURED/DERIVED). Returns false and SKIPs the test + // if any leaf was missed or refused. + // + // On success, moves the filled tensors into the bundle so downstream + // executors (engine, GPU ref, CPU ref) can upload them to the GPU. + bool synthesizeInputs(); + + // ── buffer allocation + execution ─────────────────────────────────── + // allocateSentinelOutputs / buildVariantPack prepare the buffers; + // runEngine* / runReference* call the executors and capture results. + // Outputs are sentinel-filled (NaN) so an unwritten output element is + // caught by allClose rather than masquerading as a computed zero. + OutputTensors allocateSentinelOutputs() const; + std::unordered_map buildVariantPack(OutputTensors& outputs, + bool useDevice) const; + // Runs the engine into fresh output buffers. Returns nullopt if the + // engine threw (its message is written to `error`) or raised a fatal + // GTest failure (in which case `error` is left empty). + std::optional runEngineCapturingOutputs(std::string& error); + + // Runs the engine and returns its outputs, or nullopt if it could not + // run. On nullopt the caller must simply return: this has already + // issued the appropriate verdict (a fatal failure propagates as-is, + // otherwise the test is SKIPped). Shared preamble for all three modes. + std::optional runEngineOrSkip(); + + RefRunResult runReferenceCapturingOutputs(ReferenceExecutorType type, + OutputTensors& refOutputs); + void markOutputsModified(OutputTensors& outputs) const; + static void markOutputsModifiedFor(OutputTensors& outputs, bool device); + + // ── tolerances ────────────────────────────────────────────────────── + // Default tolerance derivation (max-across-nodes, per-op/per-dtype lookup) + // is shared with the graph harness via harness/tolerance/ToleranceResolver.hpp; + // the TOML per-test override is layered on top in compareEach. + + // ── comparison ────────────────────────────────────────────────────── + void compareAgainstGolden(OutputTensors& engineOutputs); + void compareOutputs(OutputTensors& engineOutputs, OutputTensors& expected); + + template + void compareEach(OutputTensors& engineOutputs, ExpectedLookup expectedFor); + + void compareOutputTensor(int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol) const; + + // ── reporting ─────────────────────────────────────────────────────── + void skipUnverifiable(const std::string& reason); + void recordRefError(const std::string& reason); + static std::string refLabel(ReferenceExecutorType type); + + static std::string + labelFor(int64_t uid, const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs); + + std::string reportHeader(int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + float atol, + float rtol) const; + + static std::string dataTypeName(hipdnn_flatbuffers_sdk::data_objects::DataType dataType); + + static void + writeTensorDiffReport(std::ostream& os, + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_flatbuffers_sdk::data_objects::DataType dataType, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol); + + template + static void + writeFpDiffReport(std::ostream& os, + int64_t uid, + const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, + hipdnn_data_sdk::utilities::ITensor& expected, + hipdnn_data_sdk::utilities::ITensor& actual, + float atol, + float rtol); +}; + +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/bundle/IntegrationTestBundle.hpp b/dnn-providers/integration-tests/src/harness/bundle/IntegrationTestBundle.hpp new file mode 100644 index 000000000000..9bf691ae06e3 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/bundle/IntegrationTestBundle.hpp @@ -0,0 +1,328 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace hipdnn_integration_tests::bundle +{ + +// Loaded tensors keyed by tensor UID. Holds every tensor declared by the graph — +// inputs carry their data, output tensors carry their expected (golden) values as +// loaded from the .bin blobs. The harness saves the outputs as golden and zeroes +// them before execution. +using TensorMap = std::unordered_map>; + +// One test's worth of bundle data loaded from disk. +// +// graphBuffer — the parsed graph, as a flatbuffer. Always present in a +// loaded bundle; the engine deserializes it (from_binary) +// and the harness walks it (GraphWrapper) for dtypes and +// tolerances. A bundle that cannot even produce a graph is a +// LoadError, not a bundle. +// metadata — .meta.json contents (VRAM / arch guards). Mandatory ONLY +// for golden bundles (those shipping output .bin blobs); +// metadata validates golden data, so a bundle without it is +// a LoadError. For a no-golden bundle (graph-only, or +// inputs-only verified against a reference) absent metadata +// is valid and this is default-constructed (all fields +// empty); the optional-aware consumers fall back to defaults. +// outputTensorUids — UIDs of the graph's output tensors, derived from the +// graph. Always available (even for a graph-only bundle), +// so the harness knows which tensors to compare / allocate. +// tensors — the loaded tensor data, keyed by uid. Holds the INPUT +// tensors (with their data) whenever they are present on +// disk, plus the OUTPUT tensors carrying their golden values +// iff every output blob is present (see hasGoldenOutputs). +// Absent (nullopt) only when the input blobs themselves are +// not on disk — a true graph-only bundle. The harness may +// still synthesize inputs for such a bundle (tier 3); if it +// cannot, it SKIPs. +// hasGoldenOutputs — true iff every output tensor's .bin blob was present and +// loaded into `tensors`. When false, `tensors` (if present) +// carries inputs only — the engine output must be verified +// against a reference executor, not golden data. +struct IntegrationTestBundle +{ + flatbuffers::DetachedBuffer graphBuffer; + hipdnn_test_sdk::utilities::BundleMetadata metadata; + std::vector outputTensorUids; + std::optional tensors; + bool hasGoldenOutputs = false; + + // View over the graph flatbuffer, valid as long as this bundle lives. + hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper graphWrapper() const + { + return hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper{graphBuffer.data(), + graphBuffer.size()}; + } +}; + +// Why a load did NOT produce a bundle. These are the FAIL outcomes — an authoring +// error in the bundle itself. (A bundle that loads but lacks tensor data is a +// successfully-loaded graph-only bundle, not a LoadError; the harness SKIPs it.) +enum class LoadError +{ + MALFORMED_JSON, // the graph .json is not syntactically valid JSON + INVALID_GRAPH_SCHEMA, // valid JSON, but not a valid graph (cannot build flatbuffer) + MISSING_METADATA, // golden bundle's .meta.json companion is absent or invalid + TENSOR_LOAD_FAILED // a tensor .bin is present but failed to load (wrong size, + // unreadable, unsupported dtype, ...) +}; + +// A load either yields a bundle or explains why it could not. std::visit at the +// call site forces both cases to be handled. +using LoadResult = std::variant; + +inline const char* toString(LoadError error) +{ + switch(error) + { + case LoadError::MALFORMED_JSON: + return "graph JSON is not parseable"; + case LoadError::INVALID_GRAPH_SCHEMA: + return "graph JSON is not a valid graph"; + case LoadError::MISSING_METADATA: + return "missing or invalid .meta.json companion"; + case LoadError::TENSOR_LOAD_FAILED: + return "tensor .bin present but failed to load"; + default: + return "unknown load error"; + } +} + +namespace detail +{ + +// The on-disk blob path for a tensor: "{stem}.tensor{uid}.bin", matching the +// loader's own derivation. +inline std::filesystem::path tensorBlobPath(const std::filesystem::path& jsonPath, int64_t uid) +{ + auto basePath = jsonPath; + basePath.replace_extension(); + return {basePath.string() + ".tensor" + std::to_string(uid) + ".bin"}; +} + +// True iff every uid in `uids` has its companion .bin blob on disk. An empty +// `uids` set returns true (vacuously) — callers handle "no such tensors" +// separately (e.g. a graph with no inputs, or no outputs). +inline bool blobsPresentFor(const std::vector& uids, const std::filesystem::path& jsonPath) +{ + for(const int64_t uid : uids) + { + if(!std::filesystem::exists(tensorBlobPath(jsonPath, uid))) + { + return false; + } + } + return true; +} + +// The uids of every tensor declared in the graph's "tensors" array. Empty if the +// array is absent/empty (a graph-only bundle). Tensors without a "uid" are +// skipped (malformed entries are caught later when building the flatbuffer). +inline std::vector allTensorUids(const nlohmann::json& graphJson) +{ + std::vector uids; + if(!graphJson.contains("tensors") || !graphJson.at("tensors").is_array()) + { + return uids; + } + for(const auto& tensor : graphJson.at("tensors")) + { + if(tensor.contains("uid")) + { + uids.push_back(tensor.at("uid").get()); + } + } + return uids; +} + +} // namespace detail + +// Load a bundle from its graph .json path, classifying the outcome. +// +// This deliberately does NOT call test_sdk's loadGraphAndTensors(), whose +// all-or-nothing contract ("graph AND at least one tensor, or throw") conflicts +// with our design where a graph-only bundle is legitimate. Instead it composes +// the same test_sdk primitives (json -> flatbuffer graph, per-tensor blob load) +// under our own policy: +// +// * graph .json not parseable -> LoadError::MALFORMED_JSON (FAIL) +// * parseable but not a valid graph -> LoadError::INVALID_GRAPH_SCHEMA(FAIL) +// * golden bundle, no/invalid .meta.json-> LoadError::MISSING_METADATA (FAIL) +// * no-golden bundle, no .meta.json -> bundle, metadata default-constructed +// * valid graph, input .bin data absent -> bundle, tensors == nullopt (tier-3: +// harness may synthesize, else SKIP) +// * valid graph, .bin present but broken-> LoadError::TENSOR_LOAD_FAILED (FAIL) +// * valid graph, inputs present, +// outputs absent -> bundle, tensors set, +// hasGoldenOutputs == false (verify via ref) +// * valid graph, inputs + outputs present-> bundle, hasGoldenOutputs == true (golden) +// +// Inputs and outputs are loaded INDEPENDENTLY (partial loading): a bundle that +// ships input blobs but no output (golden) blobs is legitimate — its engine +// output is verified against a reference executor instead of golden data. Output +// uids come from getOutputTensorUidsFromGraph; everything else declared in the +// graph is treated as an input. +// +// The function is total: it never lets an exception escape. Every outcome is +// either a loaded bundle or a classified LoadError. +inline LoadResult loadIntegrationTestBundle(const std::filesystem::path& jsonPath) +{ + // 1. Read and parse the graph .json. Unreadable or unparseable -> FAIL. + std::ifstream stream(jsonPath); + if(!stream) + { + return LoadError::MALFORMED_JSON; + } + + const auto graphJson = nlohmann::json::parse(stream, nullptr, /*allow_exceptions=*/false); + if(graphJson.is_discarded()) + { + return LoadError::MALFORMED_JSON; + } + + // 2. Verify the graph by building the flatbuffer. A structurally invalid + // graph throws -> INVALID_GRAPH_SCHEMA. + flatbuffers::FlatBufferBuilder builder; + try + { + auto offset = hipdnn_flatbuffers_sdk::json::to( + builder, graphJson); + builder.Finish(offset); + } + catch(const std::exception&) + { + return LoadError::INVALID_GRAPH_SCHEMA; + } + + // 3. Capture the graph and derive the output UIDs (always available, even + // for a graph-only bundle). + IntegrationTestBundle bundle; + bundle.graphBuffer = builder.Release(); + bundle.outputTensorUids = hipdnn_test_sdk::utilities::getOutputTensorUidsFromGraph(graphJson); + + // 4. Metadata is mandatory ONLY for golden bundles — those shipping output + // .bin blobs. Metadata (arch lock, provenance, seed) exists to validate + // golden data; a bundle with no golden outputs (pure graph-only, or + // inputs-only verified against a reference) has nothing for it to + // validate, so absent metadata is fine and we default-construct it. + // + // loadBundleMetadata returns nullopt both when the .meta.json is absent + // and when it is present but invalid (bad JSON / bad format_version). For + // a golden bundle either case is an authoring error -> FAIL. + const bool goldenOutputsPresent = !bundle.outputTensorUids.empty() + && detail::blobsPresentFor(bundle.outputTensorUids, jsonPath); + + auto metadata = hipdnn_test_sdk::utilities::loadBundleMetadata(jsonPath); + if(!metadata.has_value()) + { + if(goldenOutputsPresent) + { + return LoadError::MISSING_METADATA; + } + metadata.emplace(); // graph-only / no-golden: empty metadata is valid. + } + bundle.metadata = std::move(*metadata); + + // 5. Load tensor .bin data, inputs and outputs INDEPENDENTLY. + // + // Output uids are the graph's outputs; every other declared tensor is an + // input. We load inputs only if all input blobs are present, and outputs + // (golden) only if all output blobs are present: + // + // * all input blobs present -> tensors gets the inputs + // * all output blobs present -> tensors also gets the golden outputs and + // hasGoldenOutputs = true + // * input blobs absent -> tensors stays nullopt (tier-3: harness + // may synthesize inputs, else SKIP) + // + // A blob that is present but fails to load (wrong size, unreadable, + // unsupported dtype) throws inside tensorFromFileAndAttributes; we catch it + // and classify it as TENSOR_LOAD_FAILED so the loader stays total. + { + const std::vector allUids = detail::allTensorUids(graphJson); + + const std::set outputUidSet(bundle.outputTensorUids.begin(), + bundle.outputTensorUids.end()); + std::vector inputUids; + for(const int64_t uid : allUids) + { + if(outputUidSet.count(uid) == 0) + { + inputUids.push_back(uid); + } + } + + // A graph with no declared inputs cannot be fed; treat as graph-only. + const bool inputsPresent + = !inputUids.empty() && detail::blobsPresentFor(inputUids, jsonPath); + const bool outputsPresent = goldenOutputsPresent; // computed in step 4 + + if(inputsPresent) + { + const auto& graph + = *hipdnn_flatbuffers_sdk::data_objects::GetGraph(bundle.graphBuffer.data()); + + // uid -> attributes, so we can load a chosen subset of tensors. + std::unordered_map + attrByUid; + for(const auto* attributes : *graph.tensors()) + { + attrByUid[attributes->uid()] = attributes; + } + + const auto loadUids = [&](const std::vector& uids, TensorMap& into) { + for(const int64_t uid : uids) + { + const auto it = attrByUid.find(uid); + if(it == attrByUid.end()) + { + continue; + } + into[uid] = hipdnn_test_sdk::utilities::tensorFromFileAndAttributes( + detail::tensorBlobPath(jsonPath, uid), *it->second); + } + }; + + try + { + TensorMap tensorMap; + loadUids(inputUids, tensorMap); + if(outputsPresent) + { + loadUids(bundle.outputTensorUids, tensorMap); + bundle.hasGoldenOutputs = true; + } + bundle.tensors = std::move(tensorMap); + } + catch(const std::exception&) + { + return LoadError::TENSOR_LOAD_FAILED; + } + } + } + + return bundle; +} + +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/bundle/UnverifiableBundleReport.hpp b/dnn-providers/integration-tests/src/harness/bundle/UnverifiableBundleReport.hpp new file mode 100644 index 000000000000..5ebe1902ba5b --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/bundle/UnverifiableBundleReport.hpp @@ -0,0 +1,125 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include + +namespace hipdnn_integration_tests::bundle +{ + +// Why a bundle could not be verified. The two severities are printed in separate +// sections so a genuine reference bug is never lost among expected coverage gaps. +// +// Unverifiable — expected coverage gap (no golden data, no reference can run +// the op, inputs could not be synthesized, ...). The engine was +// not accused; we simply had no oracle. Quiet but listed. +// RefError — a reference executor that CAN run the op threw at runtime +// (case C) and the harness fell through to keep verifying the +// engine. This is a reference bug and must be loud. +enum class UnverifiableSeverity +{ + UNVERIFIABLE, + REF_ERROR, +}; + +// Process-wide collector of bundles that ended a run without a verdict. Mirrors +// SupportMatrixCollector: a thread-safe singleton populated during test execution +// and printed once after RUN_ALL_TESTS(). Records do not affect the GTest exit +// code — an unverifiable bundle SKIPs; this report is the visible trail. +class UnverifiableBundleReport +{ +public: + struct Record + { + std::string bundle; // bundle path / identifier + std::string reason; // human-readable explanation + UnverifiableSeverity severity; + }; + + static UnverifiableBundleReport& get() + { + static UnverifiableBundleReport s_instance; + return s_instance; + } + + UnverifiableBundleReport(const UnverifiableBundleReport&) = delete; + UnverifiableBundleReport& operator=(const UnverifiableBundleReport&) = delete; + UnverifiableBundleReport(UnverifiableBundleReport&&) = delete; + UnverifiableBundleReport& operator=(UnverifiableBundleReport&&) = delete; + + void record(std::string bundle, std::string reason, UnverifiableSeverity severity) + { + const std::lock_guard lock(_mutex); + _records.push_back({std::move(bundle), std::move(reason), severity}); + } + + std::vector getRecords() const + { + const std::lock_guard lock(_mutex); + return _records; + } + + void reset() + { + const std::lock_guard lock(_mutex); + _records.clear(); + } + + // Print both severity sections to `os`. No-op when nothing was recorded. + void print(std::ostream& os = std::cout) const + { + std::vector records; + { + const std::lock_guard lock(_mutex); + records = _records; + } + if(records.empty()) + { + return; + } + + printSection(os, records, UnverifiableSeverity::REF_ERROR, "REFERENCE EXECUTOR ERRORS"); + printSection(os, records, UnverifiableSeverity::UNVERIFIABLE, "UNVERIFIABLE BUNDLES"); + } + +private: + UnverifiableBundleReport() = default; + + static void printSection(std::ostream& os, + const std::vector& records, + UnverifiableSeverity severity, + const char* heading) + { + size_t count = 0; + for(const auto& r : records) + { + if(r.severity == severity) + { + ++count; + } + } + if(count == 0) + { + return; + } + + os << "\n==== " << heading << " (" << count << ") ====\n"; + for(const auto& r : records) + { + if(r.severity == severity) + { + os << " - " << r.bundle << ": " << r.reason << "\n"; + } + } + } + + mutable std::mutex _mutex; + std::vector _records; +}; + +} // namespace hipdnn_integration_tests::bundle diff --git a/dnn-providers/integration-tests/src/harness/golden/IntegrationGraphGoldenReferenceVerificationHarness.hpp b/dnn-providers/integration-tests/src/harness/golden/IntegrationGraphGoldenReferenceVerificationHarness.hpp deleted file mode 100644 index db50296cacc3..000000000000 --- a/dnn-providers/integration-tests/src/harness/golden/IntegrationGraphGoldenReferenceVerificationHarness.hpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "harness/SharedHandle.hpp" -#include "harness/TestConfig.hpp" -#include "harness/golden/BundleDiscovery.hpp" -#include "harness/golden/IntegrationTestBundle.hpp" - -namespace hipdnn_integration_tests::golden -{ - -// Saved expected output tensors, keyed by output tensor UID. Extracted from a -// loaded bundle's output tensors just before execution: the harness keeps these -// as the golden reference and zeroes the live tensors so the runner computes -// into clean buffers. -using GoldenOutputs - = std::unordered_map>; - -class IntegrationGraphGoldenReferenceVerificationHarness : public ::testing::Test -{ -public: - explicit IntegrationGraphGoldenReferenceVerificationHarness(bool requiresDevice) - : _requiresDevice(requiresDevice) - { - } - - // The bundle is loaded once at registration time and shared into the test's - // factory; the harness does not load from disk. The path is kept only for - // diagnostic messages. - void setBundle(std::shared_ptr bundle, std::filesystem::path path) - { - _bundle = std::move(bundle); - _bundlePath = std::move(path); - } - -protected: - // NOLINTNEXTLINE(readability-identifier-naming) - void SetUp() override - { - if(_requiresDevice) - { - SKIP_IF_NO_DEVICES(); - } - - if(_bundle == nullptr) - { - GTEST_SKIP() << "No bundle set"; - } - - // A graph-only bundle (no tensor data on disk, or .bin not pulled via - // DVC) cannot be executed or compared -> SKIP. - if(!_bundle->tensors.has_value()) - { - GTEST_SKIP() << "Tensor data not available (graph-only bundle or DVC not pulled?): " - << _bundlePath; - } - - applyMetadataGuards(); - } - - // Save each output tensor's loaded data as the golden reference, then zero - // the live tensor so the runner computes into a clean buffer. Returns the - // golden map keyed by output UID. - GoldenOutputs extractGolden(TensorMap& tensorMap) const - { - GoldenOutputs golden; - const auto wrapper = _bundle->graphWrapper(); - const auto& tensorAttrMap = wrapper.getTensorMap(); - - for(const int64_t uid : _bundle->outputTensorUids) - { - const auto dataType = tensorAttrMap.at(uid)->data_type(); - auto& livePtr = tensorMap.at(uid); - - auto zeroed = std::visit( - [&](auto nativeType) { - using DataType = decltype(nativeType); - auto tensorPtr = std::unique_ptr( - new hipdnn_data_sdk::utilities::Tensor(livePtr->dims(), - livePtr->strides())); - tensorPtr->fillTensorWithValue(0.f); - return tensorPtr; - }, - hipdnn_test_sdk::utilities::datatypeToNativeVariant(dataType)); - - std::swap(zeroed, livePtr); // live map now holds the zero buffer - golden[uid] = std::move(zeroed); // golden holds the original data - } - return golden; - } - - // NOLINTNEXTLINE(readability-identifier-naming) - void TestBody() override - { - runGoldenComparison(); - } - - // Builds the graph from its serialized bytes, selects an engine (honouring - // an explicit --engine if given), builds plans, and executes into the - // variant pack. "Unsupported graph" is signalled by throwing (the harness - // translates that into a SKIP). Genuine build/execute errors use ASSERT_*. - virtual void executeGraphThroughEngine(std::unordered_map& variantPack) - { - auto handle = getSharedHandle(); - - const std::vector graphBytes( - _bundle->graphBuffer.data(), _bundle->graphBuffer.data() + _bundle->graphBuffer.size()); - - hipdnn_frontend::graph::Graph graph; - auto err = graph.from_binary(handle, graphBytes); - ASSERT_TRUE(err.is_good()) << "from_binary failed: " << err.get_message(); - - std::vector engineIds; - auto status = graph.get_ranked_engine_ids(engineIds); - - const auto graphSummary = [&] { - return std::to_string(_bundle->outputTensorUids.size()) + " output tensor(s), " - + std::to_string(engineIds.size()) + " ranked engine(s)"; - }; - - if(TestConfig::get().hasEngineName()) - { - int64_t targetEngineId = TestConfig::get().getEngineId(); - if(status.is_bad() - || std::find(engineIds.begin(), engineIds.end(), targetEngineId) == engineIds.end()) - { - throw std::runtime_error("Engine " + std::string(TestConfig::get().getEngineName()) - + " does not support this graph (" + graphSummary() + ")"); - } - graph.set_preferred_engine_id_ext(targetEngineId); - } - else - { - if(status.is_bad() || engineIds.empty()) - { - throw std::runtime_error("No engine supports this graph (" + graphSummary() + ")"); - } - } - - auto result = graph.create_execution_plans(); - ASSERT_TRUE(result.is_good()) << result.get_message(); - result = graph.check_support(); - ASSERT_TRUE(result.is_good()) << result.get_message(); - result = graph.build_plans(); - ASSERT_TRUE(result.is_good()) << result.get_message(); - - int64_t workspaceSize = 0; - result = graph.get_workspace_size(workspaceSize); - ASSERT_TRUE(result.is_good()) << result.get_message(); - ASSERT_GE(workspaceSize, 0); - const hipdnn_data_sdk::utilities::Workspace workspace(static_cast(workspaceSize)); - - result = graph.execute(handle, variantPack, workspace.get()); - ASSERT_TRUE(result.is_good()) << result.get_message(); - } - -private: - bool _requiresDevice; - std::filesystem::path _bundlePath; - std::shared_ptr _bundle; - - void runGoldenComparison() - { - auto& tensorMap = *_bundle->tensors; - - if(_bundle->outputTensorUids.empty()) - { - GTEST_SKIP() << "Bundle has no output tensors to compare: " << _bundlePath; - } - - const auto golden = extractGolden(tensorMap); - - // Build the variant pack from the tensor map. Device tests use GPU - // pointers (rawDeviceData); CPU-only unit tests use host pointers so - // they can run on CI without a GPU. - std::unordered_map variantPack; - for(auto& [uid, tensor] : tensorMap) - { - variantPack[uid] = _requiresDevice ? tensor->rawDeviceData() : tensor->rawHostData(); - } - - // executeGraphThroughEngine signals "unsupported graph" by throwing; - // the harness translates that into a SKIP. ASSERT_NO_FATAL_FAILURE - // still wraps the call so that a genuine GTest assertion inside the - // executor FAILs rather than falling through to the comparison. - bool executorThrew = false; - std::string executorError; - try - { - ASSERT_NO_FATAL_FAILURE(executeGraphThroughEngine(variantPack)); - } - catch(const std::exception& e) - { - executorThrew = true; - executorError = e.what(); - } - - if(executorThrew) - { - GTEST_SKIP() << "Executor could not run bundle " << _bundlePath << ": " - << executorError; - } - - for(auto uid : _bundle->outputTensorUids) - { - if(_requiresDevice) - { - tensorMap.at(uid)->markDeviceModified(); - } - else - { - tensorMap.at(uid)->markHostModified(); - } - } - - auto wrapper = _bundle->graphWrapper(); - const auto& tensorAttrMap = wrapper.getTensorMap(); - - for(auto uid : _bundle->outputTensorUids) - { - auto& actualTensor = *tensorMap.at(uid); - auto& expectedTensor = *golden.at(uid); - - auto* attrs = tensorAttrMap.at(uid); - auto dataType = attrs->data_type(); - - float atol = 0.0f; - float rtol = 0.0f; - resolveTolerances(wrapper, dataType, atol, rtol); - - compareOutputTensor(uid, *attrs, dataType, expectedTensor, actualTensor, atol, rtol); - } - } - - // Compare one output tensor against its golden reference via the allClose - // validator (which covers both CPU and GPU validation paths). Only on failure - // do we compute and report the element-wise tensor diff for diagnostics. - void compareOutputTensor(int64_t uid, - const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType, - hipdnn_data_sdk::utilities::ITensor& expected, - hipdnn_data_sdk::utilities::ITensor& actual, - float atol, - float rtol) const - { - auto validator = hipdnn_test_sdk::utilities::createAllCloseValidator(dataType, atol, rtol); - const bool passed = validator->allClose(expected, actual); - - if(!passed) - { - std::ostringstream report; - report << reportHeader(uid, attrs, dataType, expected, atol, rtol); - appendTensorDiff(report, uid, attrs, dataType, expected, actual, atol, rtol); - EXPECT_TRUE(false) << report.str(); - } - } - - // Appends an element-wise diff summary for FP types; non-FP types get a - // generic note (computeTensorDiff has no integer specialization). - static void - appendTensorDiff(std::ostream& os, - int64_t uid, - const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType, - hipdnn_data_sdk::utilities::ITensor& expected, - hipdnn_data_sdk::utilities::ITensor& actual, - float atol, - float rtol) - { - using DT = hipdnn_flatbuffers_sdk::data_objects::DataType; - using hipdnn_data_sdk::types::bfloat16; - using hipdnn_data_sdk::types::half; - - switch(dataType) - { - case DT::FLOAT: - appendFpDiff(os, uid, attrs, expected, actual, atol, rtol); - return; - case DT::HALF: - appendFpDiff(os, uid, attrs, expected, actual, atol, rtol); - return; - case DT::BFLOAT16: - appendFpDiff(os, uid, attrs, expected, actual, atol, rtol); - return; - case DT::DOUBLE: - appendFpDiff(os, uid, attrs, expected, actual, atol, rtol); - return; - default: - os << " (no element-wise diff available for this data type)\n"; - } - } - - template - static void appendFpDiff(std::ostream& os, - int64_t uid, - const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, - hipdnn_data_sdk::utilities::ITensor& expected, - hipdnn_data_sdk::utilities::ITensor& actual, - float atol, - float rtol) - { - const auto summary - = hipdnn_test_sdk::utilities::computeTensorDiff(expected, actual, atol, rtol); - hipdnn_test_sdk::utilities::printTensorDiffSummary(os, labelFor(uid, attrs), summary); - } - - // The human-readable label for an output tensor: its name if it has one, - // otherwise "uid=N". - static std::string labelFor(int64_t uid, - const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs) - { - const auto* name = attrs.name(); - return (name != nullptr && !name->empty()) ? name->str() : ("uid=" + std::to_string(uid)); - } - - // Common header for a failed comparison (RFC 0011 §4.3 "What a failure looks - // like"): bundle path, tensor UID/name, shape + dtype, and tolerance. The - // per-element diff (worst index, expected/actual/abs-diff, mismatch count) is - // appended by the caller from the TensorDiffSummary it already computed. - std::string reportHeader(int64_t uid, - const hipdnn_flatbuffers_sdk::data_objects::TensorAttributes& attrs, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType, - hipdnn_data_sdk::utilities::ITensor& expected, - float atol, - float rtol) const - { - std::ostringstream os; - os << "\nGolden comparison FAILED\n" - << " Bundle: " << _bundlePath << "\n" - << " Tensor: " << labelFor(uid, attrs) << " (UID " << uid << ", output)\n" - << " Shape: " << hipdnn_test_sdk::utilities::StreamVec(expected.dims()) << " " - << dataTypeName(dataType) << "\n" - << " Tolerance: atol=" << atol << " rtol=" << rtol << "\n"; - return os.str(); - } - - static std::string dataTypeName(hipdnn_flatbuffers_sdk::data_objects::DataType dataType) - { - return hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(dataType); - } - - static void - resolveTolerances(const hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper& wrapper, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType, - float& atol, - float& rtol) - { - const float defaultTolerance = deriveDefaultTolerance(wrapper, dataType); - atol = defaultTolerance; - rtol = defaultTolerance; - } - - template - static float - toleranceForNodeAttributes(hipdnn_flatbuffers_sdk::data_objects::NodeAttributes attrType) - { - using NA = hipdnn_flatbuffers_sdk::data_objects::NodeAttributes; - namespace tol = hipdnn_test_sdk::utilities; - - switch(attrType) - { - case NA::ConvolutionFwdAttributes: - return tol::conv::getToleranceFwd(); - case NA::ConvolutionBwdAttributes: - return tol::conv::getToleranceBwd(); - case NA::ConvolutionWrwAttributes: - return tol::conv::getToleranceWrw(); - case NA::BatchnormInferenceAttributes: - return tol::batchnorm::getToleranceInference(); - case NA::BatchnormInferenceAttributesVarianceExt: - return tol::batchnorm::getToleranceInferenceWithVariance(); - case NA::BatchnormAttributes: - return tol::batchnorm::getToleranceTraining(); - case NA::BatchnormBackwardAttributes: - return tol::batchnorm::getToleranceBackward(); - case NA::MatmulAttributes: - return tol::matmul::getTolerance(); - case NA::ReductionAttributes: - return tol::reduction::getTolerance(); - case NA::RMSNormAttributes: - return tol::rmsnorm::getTolerance(); - case NA::PointwiseAttributes: - return tol::pointwise::getTolerance(); - case NA::LayernormAttributes: - return tol::layernorm::getTolerance(); - default: - return 1e-3f; - } - } - - // A bundle graph may fuse several ops (e.g. Convolution + Pointwise - // activation). Each op type has its own numerical tolerance, so the only - // tolerance that holds for the fused output is the loosest one across all - // nodes: a tolerance tight enough for Conv (e.g. 1e-3) would wrongly fail an - // activation output that legitimately needs 1e-2. We therefore take the max - // tolerance over every node rather than picking a single "root" node. - static float deriveDefaultTolerance( - const hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper& wrapper, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType) - { - const auto nodeCount = wrapper.nodeCount(); - - bool found = false; - float maxTolerance = 0.0f; - for(uint32_t i = 0; i < nodeCount; ++i) - { - const auto attrType = wrapper.getNode(i).attributes_type(); - const float nodeTolerance = toleranceForDataType(attrType, dataType); - maxTolerance = found ? std::max(maxTolerance, nodeTolerance) : nodeTolerance; - found = true; - } - - return found ? maxTolerance : 1e-3f; - } - - // Dispatch a single node's tolerance lookup on the bundle's data type. - static float toleranceForDataType(hipdnn_flatbuffers_sdk::data_objects::NodeAttributes attrType, - hipdnn_flatbuffers_sdk::data_objects::DataType dataType) - { - using DT = hipdnn_flatbuffers_sdk::data_objects::DataType; - using hipdnn_data_sdk::types::bfloat16; - using hipdnn_data_sdk::types::half; - - switch(dataType) - { - case DT::FLOAT: - return toleranceForNodeAttributes(attrType); - case DT::HALF: - return toleranceForNodeAttributes(attrType); - case DT::BFLOAT16: - return toleranceForNodeAttributes(attrType); - default: - return 1e-3f; - } - } - - void applyMetadataGuards() const - { - // metadata is mandatory, so a loaded bundle always has it (a bundle with - // no .meta.json fails to load and never reaches here). Individual fields - // (VRAM, arch) are still optional within BundleMetadata; the guards below - // no-op when their field is absent, so they can be called unconditionally. - if(auto reason = hipdnn_test_sdk::utilities::checkVramRequirement( - _bundle->metadata, TestConfig::get().getCurrentDeviceVramMb())) - { - GTEST_SKIP() << *reason; - } - - if(auto reason = hipdnn_test_sdk::utilities::checkArchCompatibility( - _bundle->metadata, TestConfig::get().getCurrentArch())) - { - GTEST_SKIP() << *reason; - } - } -}; - -} // namespace hipdnn_integration_tests::golden diff --git a/dnn-providers/integration-tests/src/harness/golden/IntegrationTestBundle.hpp b/dnn-providers/integration-tests/src/harness/golden/IntegrationTestBundle.hpp deleted file mode 100644 index a05f33612437..000000000000 --- a/dnn-providers/integration-tests/src/harness/golden/IntegrationTestBundle.hpp +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace hipdnn_integration_tests::golden -{ - -// Loaded tensors keyed by tensor UID. Holds every tensor declared by the graph — -// inputs carry their data, output tensors carry their expected (golden) values as -// loaded from the .bin blobs. The harness saves the outputs as golden and zeroes -// them before execution. -using TensorMap = std::unordered_map>; - -// One test's worth of bundle data loaded from disk. -// -// graphBuffer — the parsed graph, as a flatbuffer. Always present in a -// loaded bundle; the engine deserializes it (from_binary) -// and the harness walks it (GraphWrapper) for dtypes and -// tolerances. A bundle that cannot even produce a graph is a -// LoadError, not a bundle. -// metadata — .meta.json contents (VRAM / arch guards). MANDATORY: a -// bundle without a valid .meta.json is a LoadError, so a -// loaded bundle always carries real metadata. -// outputTensorUids — UIDs of the graph's output tensors, derived from the -// graph. Always available (even for a graph-only bundle), -// so the harness knows which tensors to compare / allocate. -// tensors — the loaded tensor data. Absent (nullopt) for a graph-only -// bundle (no "tensors" in the graph, or .bin data not pulled -// via DVC); such a bundle cannot be executed/compared and -// the harness SKIPs it. -struct IntegrationTestBundle -{ - flatbuffers::DetachedBuffer graphBuffer; - hipdnn_test_sdk::utilities::BundleMetadata metadata; - std::vector outputTensorUids; - std::optional tensors; - - // View over the graph flatbuffer, valid as long as this bundle lives. - hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper graphWrapper() const - { - return hipdnn_flatbuffers_sdk::flatbuffer_utilities::GraphWrapper{graphBuffer.data(), - graphBuffer.size()}; - } -}; - -// Why a load did NOT produce a bundle. These are the FAIL outcomes — an authoring -// error in the bundle itself. (A bundle that loads but lacks tensor data is a -// successfully-loaded graph-only bundle, not a LoadError; the harness SKIPs it.) -enum class LoadError -{ - MALFORMED_JSON, // the graph .json is not syntactically valid JSON - INVALID_GRAPH_SCHEMA, // valid JSON, but not a valid graph (cannot build flatbuffer) - MISSING_METADATA, // required .meta.json companion is absent or invalid - TENSOR_LOAD_FAILED // a tensor .bin is present but failed to load (wrong size, - // unreadable, unsupported dtype, ...) -}; - -// A load either yields a bundle or explains why it could not. std::visit at the -// call site forces both cases to be handled. -using LoadResult = std::variant; - -inline const char* toString(LoadError error) -{ - switch(error) - { - case LoadError::MALFORMED_JSON: - return "graph JSON is not parseable"; - case LoadError::INVALID_GRAPH_SCHEMA: - return "graph JSON is not a valid graph"; - case LoadError::MISSING_METADATA: - return "missing or invalid .meta.json companion"; - case LoadError::TENSOR_LOAD_FAILED: - return "tensor .bin present but failed to load"; - default: - return "unknown load error"; - } -} - -namespace detail -{ - -// True iff every tensor declared in the graph has its companion .bin blob on -// disk. The blob path is "{stem}.tensor{uid}.bin", matching the loader's own -// derivation. A graph with no "tensors" array is graph-only -> returns false. -inline bool allTensorBlobsPresent(const nlohmann::json& graphJson, - const std::filesystem::path& jsonPath) -{ - if(!graphJson.contains("tensors") || !graphJson.at("tensors").is_array() - || graphJson.at("tensors").empty()) - { - return false; - } - - auto basePath = jsonPath; - basePath.replace_extension(); - for(const auto& tensor : graphJson.at("tensors")) - { - if(!tensor.contains("uid")) - { - return false; - } - const auto uid = tensor.at("uid").get(); - const auto binPath - = std::filesystem::path(basePath.string() + ".tensor" + std::to_string(uid) + ".bin"); - if(!std::filesystem::exists(binPath)) - { - return false; - } - } - return true; -} - -} // namespace detail - -// Load a bundle from its graph .json path, classifying the outcome. -// -// This deliberately does NOT call test_sdk's loadGraphAndTensors(), whose -// all-or-nothing contract ("graph AND at least one tensor, or throw") conflicts -// with our design where a graph-only bundle is legitimate. Instead it composes -// the same test_sdk primitives (json -> flatbuffer graph, per-tensor blob load) -// under our own policy: -// -// * graph .json not parseable -> LoadError::MALFORMED_JSON (FAIL) -// * parseable but not a valid graph -> LoadError::INVALID_GRAPH_SCHEMA(FAIL) -// * valid graph, no .meta.json companion-> LoadError::MISSING_METADATA (FAIL) -// * valid graph, tensor .bin data absent-> bundle with tensors == nullopt (SKIP) -// * valid graph, .bin present but broken-> LoadError::TENSOR_LOAD_FAILED (FAIL) -// * valid graph, all .bin present -> fully loaded bundle (RUN) -// -// The function is total: it never lets an exception escape. Every outcome is -// either a loaded bundle or a classified LoadError. -inline LoadResult loadIntegrationTestBundle(const std::filesystem::path& jsonPath) -{ - // 1. Read and parse the graph .json. Unreadable or unparseable -> FAIL. - std::ifstream stream(jsonPath); - if(!stream) - { - return LoadError::MALFORMED_JSON; - } - - const auto graphJson = nlohmann::json::parse(stream, nullptr, /*allow_exceptions=*/false); - if(graphJson.is_discarded()) - { - return LoadError::MALFORMED_JSON; - } - - // 2. Verify the graph by building the flatbuffer. A structurally invalid - // graph throws -> INVALID_GRAPH_SCHEMA. - flatbuffers::FlatBufferBuilder builder; - try - { - auto offset = hipdnn_flatbuffers_sdk::json::to( - builder, graphJson); - builder.Finish(offset); - } - catch(const std::exception&) - { - return LoadError::INVALID_GRAPH_SCHEMA; - } - - // 3. Metadata is MANDATORY: every valid-graph bundle must ship a valid - // .meta.json companion. loadBundleMetadata returns nullopt both when the - // file is absent and when it is present but invalid (bad JSON / bad - // format_version) — either way it is an authoring error -> FAIL. - auto metadata = hipdnn_test_sdk::utilities::loadBundleMetadata(jsonPath); - if(!metadata.has_value()) - { - return LoadError::MISSING_METADATA; - } - - // 4. Graph + metadata verified: capture them and the output UIDs (always - // available, even for a graph-only bundle). - IntegrationTestBundle bundle; - bundle.graphBuffer = builder.Release(); - bundle.metadata = std::move(*metadata); - bundle.outputTensorUids = hipdnn_test_sdk::utilities::getOutputTensorUidsFromGraph(graphJson); - - // 5. Load tensor .bin data if every blob is present; otherwise leave - // tensors == nullopt (graph-only bundle -> harness SKIPs). A blob that is - // present but fails to load (wrong size, unreadable, unsupported dtype) - // throws inside tensorFromFileAndAttributes; we catch it here and classify - // it as TENSOR_LOAD_FAILED so the loader is total (every outcome is either - // a bundle or a named LoadError, never a raw escaping exception). - if(detail::allTensorBlobsPresent(graphJson, jsonPath)) - { - const auto& graph - = *hipdnn_flatbuffers_sdk::data_objects::GetGraph(bundle.graphBuffer.data()); - auto basePath = jsonPath; - basePath.replace_extension(); - - try - { - TensorMap tensorMap; - for(const auto* attributes : *graph.tensors()) - { - const auto tensorPath - = basePath.string() + ".tensor" + std::to_string(attributes->uid()) + ".bin"; - tensorMap[attributes->uid()] - = hipdnn_test_sdk::utilities::tensorFromFileAndAttributes(tensorPath, - *attributes); - } - bundle.tensors = std::move(tensorMap); - } - catch(const std::exception&) - { - return LoadError::TENSOR_LOAD_FAILED; - } - } - - return bundle; -} - -} // namespace hipdnn_integration_tests::golden diff --git a/dnn-providers/integration-tests/src/harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp b/dnn-providers/integration-tests/src/harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp index 671dcb248d19..ab5aea01933f 100644 --- a/dnn-providers/integration-tests/src/harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp +++ b/dnn-providers/integration-tests/src/harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp @@ -8,6 +8,7 @@ #include "detail/GpuPlanBuilderRegistry.hpp" #include "harness/IReferenceGraphExecutor.hpp" +#include "harness/ReferenceCapabilityError.hpp" namespace hipdnn_integration_tests::gpu_graph_executor { @@ -83,8 +84,8 @@ class GpuReferenceGraphExecutor : public IReferenceGraphExecutor { const std::string nodeName = node.name() == nullptr ? " unknown" : " " + node.name()->str(); - throw std::runtime_error("GPU plan builder is not applicable for the given node:" - + nodeName); + throw ReferenceCapabilityError("GPU plan builder is not applicable for the given node:" + + nodeName); } return planBuilder.buildNodePlan(graph, node); @@ -123,15 +124,17 @@ class GpuReferenceGraphExecutor : public IReferenceGraphExecutor case NodeAttrs::BlockScaleQuantizeAttributes: { const std::string nodeName = node.name() == nullptr ? "unknown" : node.name()->str(); - throw std::runtime_error("GPU plan not yet implemented for node '" + nodeName - + "'. Register a GPU plan for this operation type."); + throw ReferenceCapabilityError("GPU plan not yet implemented for node '" + nodeName + + "'. Register a GPU plan for this operation type."); } case NodeAttrs::CustomOpAttributes: - throw std::runtime_error("GPU reference executor does not support custom operations"); + throw ReferenceCapabilityError( + "GPU reference executor does not support custom operations"); default: - throw std::runtime_error("Unsupported node type for GPU signature key generation"); + throw ReferenceCapabilityError( + "Unsupported node type for GPU signature key generation"); } } diff --git a/dnn-providers/integration-tests/src/harness/input_init/SynthesisTracker.hpp b/dnn-providers/integration-tests/src/harness/input_init/SynthesisTracker.hpp new file mode 100644 index 000000000000..3a5ca814438e --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/input_init/SynthesisTracker.hpp @@ -0,0 +1,230 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace hipdnn_integration_tests +{ + +// Pre-allocated input tensors keyed by uid, handed to a fill function to populate. +using InputTensorMap + = std::unordered_map>; + +// Result of a synthesis step — returned by per-node fill functions and by +// tracker.finish(). filled==true means synthesis can proceed; filled==false +// means at least one input could not be synthesized — reason says which and why. +struct SynthesisResult +{ + bool filled = false; + std::string reason; + + static SynthesisResult ok() + { + return {true, {}}; + } + static SynthesisResult unsupported(std::string why) + { + return {false, std::move(why)}; + } +}; + +// Tracks which leaf inputs of a bundle's graph have been accounted for by the +// per-node fill functions. A bundle contains a graph of one or more nodes — a +// single conv, or a fused chain like conv → bias_add → relu. One tracker is +// created for the entire graph's leaf inputs (non-virtual, non-output tensors), +// shared across all fill functions, and finish() is called once at the end. +// +// Graph structure (conv + bias + relu fused graph): +// +// Data flows top-down. Roots are the leaf input tensors that the tracker +// owns; the sink is the graph output tensor. +// +// x (root/leaf) w (root/leaf) bias (root/leaf) +// uid=1 uid=2 uid=4 +// \ / | +// \ / | +// ┌──────────────┐ | +// │ ConvFwd │ (internal) | +// └──────┬───────┘ | +// | | +// conv_y (virtual, uid=10) | +// | | +// \ / +// ┌──────────────────────┐ +// │ Pointwise ADD │ (internal) +// └──────────┬───────────┘ +// | +// bias_out (virtual, uid=11) +// | +// ┌──────────┴───────────┐ +// │ Pointwise RELU │ (internal) +// └──────────┬───────────┘ +// | +// out (sink/leaf, uid=6) +// +// Roots = leaf input tensors, owned by tracker: {1, 2, 4} +// Virtual = inter-node edges, not owned → fillFree/markDerived skip them +// Sink = graph output tensor, not owned +// +// Each leaf input must be declared as one of three mutually exclusive roles: +// +// FREE — random values in a range work. The range can be tight (e.g. +// variance in [0.5, 1.5] to stay positive) or wide (e.g. x in +// [-1, 1]). What matters is that any value in the range is valid. +// +// STRUCTURED — random values in any range won't work. The data needs to be +// consistent with other state or follow a specific format. +// +// Example 1: dropout seeds — forward and backward must use the +// same seed so they generate the same drop pattern. A randomly +// synthesized seed for a standalone backward won't match any +// forward pass, producing wrong gradients. +// +// Example 2: page table indices (paged attention) — when serving +// multiple users, each user's K and V data grows at different +// rates. Instead of pre-allocating a large contiguous block per +// user, GPU memory is pooled into equal-size chunks handed out +// on demand. A user's data ends up scattered across +// non-contiguous chunks. The page table tensor holds chunk +// indices telling the kernel where each user's data lives. +// Randomly generated indices would not correspond to valid +// allocated chunks, producing incorrect reads or crashes. +// +// Example 3: peer_stats (multi-GPU batchnorm) — when a batch +// is split across multiple GPUs, each GPU computes local +// statistics (mean, variance) for its chunk. To produce +// correct global statistics, each GPU must read the others' +// partial results. The peer_stats tensor holds references to +// other GPUs' memory regions. Randomly generated values would +// point to invalid cross-device memory. +// +// DERIVED — the value must come from another op's output, not from random +// generation. In a fused fwd+bwd graph the forward output flows +// to the backward input as a virtual tensor (not owned, silently +// skipped). In a standalone backward, the same tensor is a leaf +// input. markDerived records it and lets finish() refuse (SKIP), +// used when no recipe exists to produce a consistent value. +// +// finish() succeeds only when every owned leaf input was declared as some role +// AND none were STRUCTURED or DERIVED. Undeclared inputs and refused inputs both +// produce a diagnostic message so the caller knows what went wrong. +// +// PRECONDITION — a validated, well-formed graph. The tracker trusts the leaf +// set it is handed and the virtual_ flag on every tensor: +// +// * A required input referenced by a node is assumed to be a real leaf tensor +// (not mislabeled virtual or aliased to an output). If it were, fillFree +// would silently no-op on a non-owned uid and finish() would never see it. +// * A virtual tensor is assumed to genuinely have a producer node. A standalone +// backward whose `o`/`stats` were erroneously flagged virtual would skip the +// markDerived refusal and "succeed" with garbage. +// +// Both of those malformed-graph states are rejected upstream — at bundle load +// (the flatbuffer build in loadIntegrationTestBundle) and again by the engine's +// own graph validation (from_binary / check_support / build_plans), which +// requires every virtual tensor to have a producer. By the time synthesis runs, +// the graph is well-formed, so the tracker does not re-validate topology. +class SynthesisTracker +{ +public: + SynthesisTracker(const std::vector& ownedLeafInputUids, InputTensorMap& inputs) + : _inputs(inputs) + , _owned(ownedLeafInputUids.begin(), ownedLeafInputUids.end()) + { + } + + // Declares `uid` as FREE — fills it with random values in [lo, hi] and accounts for it. + void fillFree(int64_t uid, float lo, float hi, std::mt19937& rng) + { + if(!isOwned(uid)) + { + return; + } + const auto seed = static_cast(rng()); + _inputs.at(uid)->fillTensorWithRandomValues(lo, hi, seed); + _accounted.insert(uid); + } + + // Declares `uid` as STRUCTURED — accounts for it but records a refusal. + void markStructured(int64_t uid, const char* role) + { + if(!isOwned(uid)) + { + return; + } + _accounted.insert(uid); + _refusals.push_back(std::string(role) + " (structured input)"); + } + + // Declares `uid` as DERIVED — accounts for it but records a refusal. + void markDerived(int64_t uid, const char* role) + { + if(!isOwned(uid)) + { + return; + } + _accounted.insert(uid); + _refusals.push_back(std::string(role) + " (derived from another computation)"); + } + + // Returns ok() when all owned leaf inputs were filled with random data. + // Returns unsupported() when synthesis cannot produce valid data for + // this graph — either because a leaf input is STRUCTURED/DERIVED + // (we know about it but can't fill it), or because a leaf input was + // never declared by any node's fill function. + // Note: virtual inter-node tensors are not owned, so STRUCTURED/DERIVED + // calls on them are silently ignored. Absent optional tensors (uid 0 by + // hipdnn convention) are the caller's responsibility — fill functions + // should guard against calling fillFree/markStructured on uid 0 when the + // attribute means "not present." + SynthesisResult finish(const char* opName) const + { + std::vector reasons = _refusals; + for(const int64_t uid : _owned) + { + if(_accounted.count(uid) == 0) + { + reasons.push_back("tensor uid=" + std::to_string(uid) + + " (no role declared by initializer)"); + } + } + + if(reasons.empty()) + { + return SynthesisResult::ok(); + } + + std::ostringstream os; + os << opName << " inputs cannot be synthesized: "; + for(size_t i = 0; i < reasons.size(); ++i) + { + os << (i == 0 ? "" : ", ") << reasons[i]; + } + return SynthesisResult::unsupported(os.str()); + } + +private: + bool isOwned(int64_t uid) const + { + return _owned.count(uid) != 0; + } + + InputTensorMap& _inputs; // leaf inputs only (non-virtual, non-output tensors) + std::set _owned; + std::set _accounted; + std::vector _refusals; +}; + +} // namespace hipdnn_integration_tests diff --git a/dnn-providers/integration-tests/src/harness/input_init/SynthesizeInputs.hpp b/dnn-providers/integration-tests/src/harness/input_init/SynthesizeInputs.hpp new file mode 100644 index 000000000000..1094cb5c1806 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/input_init/SynthesizeInputs.hpp @@ -0,0 +1,495 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "harness/input_init/SynthesisTracker.hpp" + +namespace hipdnn_integration_tests +{ + +// ── Per-op fill functions ───────────────────────────────────────────────────── +// Each function declares inputs for one node in the graph. A single +// SynthesisTracker is shared across all nodes in the graph — the caller +// (synthesizeInputs in the harness .cpp) creates it with the whole-graph leaf +// input UIDs, passes it through each fill function, then calls finish() once +// after all nodes have been processed. This graph-level tracking is essential +// for fused/multi-node graphs: each node only accounts for its own UIDs, and +// the final finish() verifies that every leaf input was covered by some node. +// +// Every function follows the same pattern: +// 1. Cast the node to its concrete attribute type. +// 2. Declare each input as FREE (fill with random values), STRUCTURED (can't +// synthesize — needs specific format), or DERIVED (must come from another +// op's output). See SynthesisTracker.hpp for role definitions. +// 3. Return ok() if the attribute cast succeeded, or unsupported() if not. +// +// Fills must be deterministic given `rng` so re-running the same graph produces +// identical inputs for reproducible comparisons. +// +// To add a new op: copy fillConvFwdInputs (simplest example), adapt for your +// op's attributes, and add one case to the switch in synthesizeNodeInputs(). +// Function names follow the pattern fillInputs. + +// ── Convolution ─────────────────────────────────────────────────────────────── + +inline SynthesisResult fillConvFwdInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_ConvolutionFwdAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not ConvolutionFwdAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->w_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +inline SynthesisResult fillConvBwdDataInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_ConvolutionBwdAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not ConvolutionBwdAttributes"); + } + tracker.fillFree(a->dy_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->w_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +inline SynthesisResult + fillConvBwdWeightsInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_ConvolutionWrwAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not ConvolutionWrwAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->dy_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── Batchnorm ───────────────────────────────────────────────────────────────── + +inline SynthesisResult + fillBatchnormInferenceInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BatchnormInferenceAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BatchnormInferenceAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->mean_tensor_uid(), -0.1f, 0.1f, rng); + tracker.fillFree(a->inv_variance_tensor_uid(), 0.5f, 1.5f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->bias_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +inline SynthesisResult + fillBatchnormInferenceVarianceInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BatchnormInferenceAttributesVarianceExt(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BatchnormInferenceAttributesVarianceExt"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->mean_tensor_uid(), -1.0f, 1.0f, rng); + // Variance must be non-negative + tracker.fillFree(a->variance_tensor_uid(), 0.1f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->bias_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->epsilon_tensor_uid(), 0.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// peer_stats holds references to other GPUs' memory for multi-GPU batchnorm — +// randomly generated values would point to invalid cross-device memory. +inline SynthesisResult + fillBatchnormTrainingInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BatchnormAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BatchnormAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -2.0f, 2.0f, rng); + tracker.fillFree(a->bias_tensor_uid(), -2.0f, 2.0f, rng); + tracker.fillFree(a->epsilon_tensor_uid(), 0.0f, 1.0f, rng); + tracker.fillFree(a->prev_running_mean_tensor_uid().value_or(0), -2.0f, 2.0f, rng); + tracker.fillFree(a->prev_running_variance_tensor_uid().value_or(0), -2.0f, 2.0f, rng); + tracker.fillFree(a->momentum_tensor_uid().value_or(0), 0.0f, 1.0f, rng); + + if(a->peer_stats_tensor_uid() != nullptr) + { + for(const int64_t uid : *a->peer_stats_tensor_uid()) + { + tracker.markStructured(uid, "peer_stats"); + } + } + + return SynthesisResult::ok(); +} + +// mean/inv_variance are optional (may come from forward). peer_stats: see above. +inline SynthesisResult + fillBatchnormBackwardInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BatchnormBackwardAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BatchnormBackwardAttributes"); + } + tracker.fillFree(a->dy_tensor_uid(), -0.1f, 0.1f, rng); + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->mean_tensor_uid().value_or(0), -0.1f, 0.1f, rng); + tracker.fillFree(a->inv_variance_tensor_uid().value_or(0), 1.9f, 2.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -0.1f, 0.1f, rng); + + if(a->peer_stats_tensor_uid() != nullptr) + { + for(const int64_t uid : *a->peer_stats_tensor_uid()) + { + tracker.markStructured(uid, "peer_stats"); + } + } + + return SynthesisResult::ok(); +} + +// ── Matmul ──────────────────────────────────────────────────────────────────── + +inline SynthesisResult fillMatmulInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_MatmulAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not MatmulAttributes"); + } + tracker.fillFree(a->a_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->b_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── Pointwise ───────────────────────────────────────────────────────────────── + +inline SynthesisResult fillPointwiseInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_PointwiseAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not PointwiseAttributes"); + } + tracker.fillFree(a->in_0_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->in_1_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + tracker.fillFree(a->in_2_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + tracker.fillFree(a->axis_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── Reduction ───────────────────────────────────────────────────────────────── + +inline SynthesisResult fillReductionInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_ReductionAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not ReductionAttributes"); + } + tracker.fillFree(a->in_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── LayerNorm ───────────────────────────────────────────────────────────────── + +inline SynthesisResult fillLayernormInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_LayernormAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not LayernormAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->bias_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->epsilon_tensor_uid(), 0.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// mean and inv_variance are computed by the forward pass — a standalone backward +// can't produce correct gradients without them. +inline SynthesisResult + fillLayernormBackwardInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_LayernormBackwardAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not LayernormBackwardAttributes"); + } + tracker.fillFree(a->dy_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.markDerived(a->mean_tensor_uid().value_or(0), "mean (forward output)"); + tracker.markDerived(a->inv_variance_tensor_uid().value_or(0), "inv_variance (forward output)"); + tracker.fillFree(a->epsilon_tensor_uid().value_or(0), 0.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── RMSNorm ─────────────────────────────────────────────────────────────────── + +inline SynthesisResult fillRmsnormInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_RMSNormAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not RMSNormAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->epsilon_tensor_uid(), 0.0f, 1.0f, rng); + tracker.fillFree(a->bias_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// inv_rms is computed by the forward pass. +inline SynthesisResult + fillRmsnormBackwardInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_RMSNormBackwardAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not RMSNormBackwardAttributes"); + } + tracker.fillFree(a->dy_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid(), -1.0f, 1.0f, rng); + tracker.markDerived(a->inv_rms_tensor_uid(), "inv_rms (forward output)"); + return SynthesisResult::ok(); +} + +// ── Resample ────────────────────────────────────────────────────────────────── + +inline SynthesisResult fillResampleFwdInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_ResampleFwdAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not ResampleFwdAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── Block-scale quantization ────────────────────────────────────────────────── + +// Scale tensor holds per-block quantization factors that must match the +// quantized data — random scales would produce garbage dequantized values. +inline SynthesisResult + fillBlockScaleDequantizeInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BlockScaleDequantizeAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BlockScaleDequantizeAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + tracker.markStructured(a->scale_tensor_uid(), "scale (block quantization scales)"); + return SynthesisResult::ok(); +} + +inline SynthesisResult + fillBlockScaleQuantizeInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_BlockScaleQuantizeAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not BlockScaleQuantizeAttributes"); + } + tracker.fillFree(a->x_tensor_uid(), -1.0f, 1.0f, rng); + return SynthesisResult::ok(); +} + +// ── SDPA ────────────────────────────────────────────────────────────────────── + +// Q/K/V/mask accept random values, as does scale (the softmax multiplier, e.g. +// 1/sqrt(head_dim) — any positive value is mathematically valid). The FP8/MX +// descale/scale factors are STRUCTURED, NOT free: each must equal the actual +// quantization factor used to produce its tensor's data. A random descale does +// not break the engine-vs-reference comparison (both read the same shared value) +// but it lets values drift out of FP8 range and saturate identically on both +// sides — a vacuous pass that verifies nothing. We therefore refuse to fabricate +// them, mirroring fillBlockScaleDequantizeInputs. Real FP8 coverage comes from +// authored bundles that ship the matching scales as data. The remaining inputs +// are STRUCTURED for their own reasons: seq lengths encode actual sequence +// boundaries, page tables map to allocated GPU memory chunks, block masks define +// sparse attention patterns, and dropout seed/offset must match between fwd and +// bwd. Most of these are optional — absent ones (uid 0) are silently ignored. +inline SynthesisResult fillSdpaForwardInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_SdpaAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not SdpaAttributes"); + } + + tracker.fillFree(a->q_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->k_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->v_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->attn_mask_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid().value_or(0), 0.1f, 1.0f, rng); + + // FP8/MX quantization scale factors must match the data's true scale — see + // the header comment. Refuse rather than fabricate a meaningless value. + tracker.markStructured(a->descale_q_tensor_uid().value_or(0), "descale_q"); + tracker.markStructured(a->descale_k_tensor_uid().value_or(0), "descale_k"); + tracker.markStructured(a->descale_v_tensor_uid().value_or(0), "descale_v"); + tracker.markStructured(a->descale_s_tensor_uid().value_or(0), "descale_s"); + tracker.markStructured(a->scale_s_tensor_uid().value_or(0), "scale_s"); + tracker.markStructured(a->scale_o_tensor_uid().value_or(0), "scale_o"); + + tracker.markStructured(a->seq_len_q_tensor_uid().value_or(0), "seq_len_q"); + tracker.markStructured(a->seq_len_kv_tensor_uid().value_or(0), "seq_len_kv"); + tracker.markStructured(a->page_table_k_tensor_uid().value_or(0), "page_table_k"); + tracker.markStructured(a->page_table_v_tensor_uid().value_or(0), "page_table_v"); + tracker.markStructured(a->block_mask_tensor_uid().value_or(0), "block_mask"); + tracker.markStructured(a->seed_tensor_uid().value_or(0), "dropout_seed"); + tracker.markStructured(a->offset_tensor_uid().value_or(0), "dropout_offset"); + + return SynthesisResult::ok(); +} + +// Q/K/V/dO accept random values. O (the forward output) and stats (softmax +// statistics) are DERIVED — they must come from a forward pass to produce +// correct gradients. In a fused forward+backward graph these are virtual +// inter-node tensors (not owned, so silently skipped). A standalone backward +// without a forward is refused. +inline SynthesisResult + fillSdpaBackwardInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + const auto* a = node.attributes_as_SdpaBackwardAttributes(); + if(a == nullptr) + { + return SynthesisResult::unsupported("not SdpaBackwardAttributes"); + } + + tracker.fillFree(a->q_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->k_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->v_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->do_tensor_uid(), -1.0f, 1.0f, rng); + tracker.fillFree(a->scale_tensor_uid().value_or(0), 0.1f, 1.0f, rng); + tracker.fillFree(a->dropout_scale_tensor_uid().value_or(0), 0.1f, 1.0f, rng); + tracker.fillFree(a->dropout_scale_inv_tensor_uid().value_or(0), 0.1f, 1.0f, rng); + tracker.fillFree(a->attn_mask_tensor_uid().value_or(0), -1.0f, 1.0f, rng); + + tracker.markDerived(a->o_tensor_uid(), "o (forward output)"); + tracker.markDerived(a->stats_tensor_uid(), "stats (forward softmax stats)"); + + tracker.markStructured(a->seq_len_q_tensor_uid().value_or(0), "seq_len_q"); + tracker.markStructured(a->seq_len_kv_tensor_uid().value_or(0), "seq_len_kv"); + tracker.markStructured(a->seed_tensor_uid().value_or(0), "dropout_seed"); + tracker.markStructured(a->offset_tensor_uid().value_or(0), "dropout_offset"); + + return SynthesisResult::ok(); +} + +// ── Dispatch ────────────────────────────────────────────────────────────────── +// Routes a node to its fill function based on the flatbuffer attribute type. +// The harness calls this once per node in the graph — for a fused graph like +// conv+bias+relu, each node is dispatched separately with only its own inputs. +// Returns ok() when all of the node's inputs were filled, or unsupported() with +// a diagnostic when the op is unrecognized or an input can't be synthesized. + +inline SynthesisResult synthesizeNodeInputs(const hipdnn_flatbuffers_sdk::data_objects::Node& node, + SynthesisTracker& tracker, + std::mt19937& rng) +{ + using NA = hipdnn_flatbuffers_sdk::data_objects::NodeAttributes; + + switch(node.attributes_type()) + { + case NA::ConvolutionFwdAttributes: + return fillConvFwdInputs(node, tracker, rng); + case NA::ConvolutionBwdAttributes: + return fillConvBwdDataInputs(node, tracker, rng); + case NA::ConvolutionWrwAttributes: + return fillConvBwdWeightsInputs(node, tracker, rng); + case NA::BatchnormInferenceAttributes: + return fillBatchnormInferenceInputs(node, tracker, rng); + case NA::BatchnormInferenceAttributesVarianceExt: + return fillBatchnormInferenceVarianceInputs(node, tracker, rng); + case NA::BatchnormAttributes: + return fillBatchnormTrainingInputs(node, tracker, rng); + case NA::BatchnormBackwardAttributes: + return fillBatchnormBackwardInputs(node, tracker, rng); + case NA::MatmulAttributes: + return fillMatmulInputs(node, tracker, rng); + case NA::PointwiseAttributes: + return fillPointwiseInputs(node, tracker, rng); + case NA::ReductionAttributes: + return fillReductionInputs(node, tracker, rng); + case NA::LayernormAttributes: + return fillLayernormInputs(node, tracker, rng); + case NA::LayernormBackwardAttributes: + return fillLayernormBackwardInputs(node, tracker, rng); + case NA::RMSNormAttributes: + return fillRmsnormInputs(node, tracker, rng); + case NA::RMSNormBackwardAttributes: + return fillRmsnormBackwardInputs(node, tracker, rng); + case NA::ResampleFwdAttributes: + return fillResampleFwdInputs(node, tracker, rng); + case NA::BlockScaleDequantizeAttributes: + return fillBlockScaleDequantizeInputs(node, tracker, rng); + case NA::BlockScaleQuantizeAttributes: + return fillBlockScaleQuantizeInputs(node, tracker, rng); + case NA::SdpaAttributes: + return fillSdpaForwardInputs(node, tracker, rng); + case NA::SdpaBackwardAttributes: + return fillSdpaBackwardInputs(node, tracker, rng); + default: + return SynthesisResult::unsupported("no input synthesis registered for this op"); + } +} + +} // namespace hipdnn_integration_tests diff --git a/dnn-providers/integration-tests/src/harness/tolerance/ToleranceResolver.hpp b/dnn-providers/integration-tests/src/harness/tolerance/ToleranceResolver.hpp new file mode 100644 index 000000000000..2d28d835c5f5 --- /dev/null +++ b/dnn-providers/integration-tests/src/harness/tolerance/ToleranceResolver.hpp @@ -0,0 +1,250 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "harness/TomlGuards.hpp" + +// Shared default-tolerance resolution for both verification harnesses +// (ALMIOPEN-2216). Both the programmatic graph harness and the bundle harness +// reduce to the same question — "given a serialized graph and an output dtype, +// what default atol/rtol should the comparison use?" — so the policy lives here, +// keyed on the flatbuffer GraphWrapper, which is the common representation: the +// bundle harness already holds one, and the graph harness obtains it via +// Graph::to_binary(). +// +// This header owns POLICY only; the per-operation / per-dtype tolerance NUMBERS +// stay in hipdnn_test_sdk TestTolerances.hpp and are read, never modified. +// +// TODO(dynamic tolerance): the per-op tolerance source here is the FIXED table +// (TestTolerances.hpp). The codebase already ships a dynamic, shape/dtype-aware +// model — hipdnn_test_sdk DynamicTolerances.hpp + per-op DynamicTolerances{Matmul, +// Conv,BatchNorm,LayerNorm,RMSNorm,Sdpa,Pointwise}.hpp and +// pointwise/PointwiseErrorClassification.hpp — already wired into other test +// fixtures (conv gpu-ref, sdpa backward, cpu-executor plan tests). RFC 0011 +// §"Tolerance Framework" / "Future Work #1" defines the upgrade: replace the +// fixed level-3 default with DynamicTolerances, keyed on graph properties +// (op, dtype, tensor dims), without changing the three-level chain or this +// aggregation policy. When promoting, add a sibling aggregation function that +// routes through the existing DynamicTolerances functions instead of +// TestTolerances.hpp, and pass it to resolveTolerance; also add +// sub-bf16 dtypes (FP4) which the current DataType switch lacks (falls through to +// 1e-3). See ALMIOPEN-2216. +// +// Two policy decisions are encoded here, each kept independently evolvable: +// +// * Aggregation = max-across-nodes. The output tolerance is the loosest +// per-node tolerance in the graph. This is the conservative envelope: it can +// be too loose on a long fused chain but is never too tight, so it never +// manufactures a false failure. Root-op-only selection (the graph harness's +// prior heuristic) is unsafe — an upstream high-K / low-precision node +// dominates the error, so picking the "root" can under-tolerance and fail a +// correct kernel. A principled alternative (analytic error propagation along +// the producer chain) is the documented future upgrade; it needs per-op +// condition-number models and is deferred. +// +// * dtype key = the OUTPUT tensor's dtype (passed in by the caller). Truly +// per-node dtype keying — each node keyed on its own output-edge dtype — only +// differs from this in mixed-I/O fused graphs, and recovering a node's +// output dtype needs a per-op tensor-UID extractor (the flatbuffer Node +// carries only compute_data_type, not its I/O tensors). That extractor is +// the same machinery the per-output subgraph walk needs, so per-node dtype is +// deferred together with multi-output support (ALMIOPEN-2216). +// +// resolveTolerance() is the single entry point for both harnesses: it derives +// the max-across-nodes default and then applies the TOML per-test override (the +// highest-priority layer) in one place, so neither harness applies the override +// separately and the layering order lives here alone. + +namespace hipdnn_integration_tests::tolerance +{ + +namespace fb = hipdnn_flatbuffers_sdk::flatbuffer_utilities; +namespace data = hipdnn_flatbuffers_sdk::data_objects; + +// Per-op tolerance for one node attribute type, at a fixed element type T. +// Maps a flatbuffer NodeAttributes tag onto the corresponding TestTolerances.hpp +// entry. Unknown ops fall back to a conservative 1e-3. +template +inline float toleranceForNodeAttributes(data::NodeAttributes attrType) +{ + using NA = data::NodeAttributes; + namespace tol = hipdnn_test_sdk::utilities; + + switch(attrType) + { + case NA::ConvolutionFwdAttributes: + return tol::conv::getToleranceFwd(); + case NA::ConvolutionBwdAttributes: + return tol::conv::getToleranceBwd(); + case NA::ConvolutionWrwAttributes: + return tol::conv::getToleranceWrw(); + case NA::BatchnormInferenceAttributes: + return tol::batchnorm::getToleranceInference(); + case NA::BatchnormInferenceAttributesVarianceExt: + return tol::batchnorm::getToleranceInferenceWithVariance(); + case NA::BatchnormAttributes: + return tol::batchnorm::getToleranceTraining(); + case NA::BatchnormBackwardAttributes: + return tol::batchnorm::getToleranceBackward(); + case NA::MatmulAttributes: + return tol::matmul::getTolerance(); + case NA::ReductionAttributes: + return tol::reduction::getTolerance(); + case NA::RMSNormAttributes: + return tol::rmsnorm::getTolerance(); + case NA::PointwiseAttributes: + return tol::pointwise::getTolerance(); + case NA::LayernormAttributes: + return tol::layernorm::getTolerance(); + case NA::SdpaAttributes: + case NA::SdpaBackwardAttributes: + return tol::sdpa::getToleranceFwd(); + default: + return 1e-3f; + } +} + +// Dispatch the element-type template on a runtime DataType. +inline float toleranceForNode(data::NodeAttributes attrType, data::DataType dataType) +{ + using DT = data::DataType; + using hipdnn_data_sdk::types::bfloat16; + using hipdnn_data_sdk::types::half; + + switch(dataType) + { + case DT::FLOAT: + return toleranceForNodeAttributes(attrType); + case DT::HALF: + return toleranceForNodeAttributes(attrType); + case DT::BFLOAT16: + return toleranceForNodeAttributes(attrType); + default: + return 1e-3f; + } +} + +// An aggregation policy reduces the per-node tolerances of a graph to one +// default tolerance for an output. It is just a function (graph, dtype) -> float; +// new policies are added as new functions, and resolveTolerance() takes the +// chosen one as a parameter. No enum/switch — the policy IS the function. +using AggregationPolicy = float (*)(const fb::GraphWrapper&, data::DataType); + +// Conservative policy (the default): max-across-nodes — the loosest per-node +// tolerance in the graph. Never tighter than any single node, so it cannot +// manufacture a false failure; for a fused output (which genuinely accumulates +// error from every op on its chain) the loosest contributing op is the correct +// floor. Returns 1e-3 for a graph with no nodes. +inline float maxAcrossNodes(const fb::GraphWrapper& wrapper, data::DataType dataType) +{ + const auto nodeCount = wrapper.nodeCount(); + + bool found = false; + float maxTolerance = 0.0f; + for(uint32_t i = 0; i < nodeCount; ++i) + { + const auto attrType = wrapper.getNode(i).attributes_type(); + const float nodeTolerance = toleranceForNode(attrType, dataType); + maxTolerance = found ? std::max(maxTolerance, nodeTolerance) : nodeTolerance; + found = true; + } + + return found ? maxTolerance : 1e-3f; +} + +// Output-op policy: the tolerance of the last non-Pointwise node in topological +// order — i.e. the op that produces the graph's output. This reproduces the +// graph harness's historical getTolerance() behavior so the C++ graph tests keep +// their exact tolerances as they migrate. It is tighter than maxAcrossNodes only +// on fused chains whose loosest op is NOT the output op; for the common case +// (one real op + activation) the two policies are identical, since the activation +// is Pointwise (skipped) and the single real op is both loosest and last. +// +// NOTE: this is a heuristic, not a principled tight bound — it attributes the +// whole output's tolerance to one op and ignores upstream error accumulation. +// Kept only for migration parity; max remains the default everywhere else, and +// the principled tighten path is the future DynamicTolerances upgrade. Falls back +// to maxAcrossNodes if every node is Pointwise (no clear producing op). +inline float outputOpTolerance(const fb::GraphWrapper& wrapper, data::DataType dataType) +{ + const auto nodeCount = wrapper.nodeCount(); + + bool foundRoot = false; + data::NodeAttributes rootAttr = data::NodeAttributes::NONE; + for(uint32_t i = 0; i < nodeCount; ++i) + { + const auto attrType = wrapper.getNode(i).attributes_type(); + if(attrType != data::NodeAttributes::PointwiseAttributes) + { + rootAttr = attrType; // last non-Pointwise wins (topological order) + foundRoot = true; + } + } + + if(!foundRoot) + { + return maxAcrossNodes(wrapper, dataType); + } + return toleranceForNode(rootAttr, dataType); +} + +// Future policies live here as sibling functions, e.g.: +// float propagatedBound(wrapper, dtype); // analytic error propagation +// float dynamic(wrapper, dtype); // wired to DynamicTolerances.hpp +// Each is added without touching resolveTolerance or any caller — pass it in. + +// Warn (once per call site) when a graph has more than one output tensor. +// +// Every current aggregation policy reduces over the WHOLE graph, not the subgraph +// that produces a given output: maxAcrossNodes takes the loosest of all nodes, +// outputOpTolerance takes the single last non-Pointwise node. For a multi-output +// graph neither is scoped to the output being toleranced, so a tolerance may be +// attributed from an unrelated branch. The precise fix (per-output subgraph +// scoping) is deferred together with per-node dtype keying (ALMIOPEN-2216), +// because both need a per-op tensor-UID extractor. Until then we surface the +// imprecision loudly rather than letting it pass silently. +inline void warnIfMultipleOutputs(std::size_t outputCount, const char* context) +{ + if(outputCount > 1) + { + HIPDNN_PLUGIN_LOG_WARN(context + << ": graph has " << outputCount + << " output tensors; tolerance is reduced over the whole graph, not " + "the per-output subgraph (deferred, ALMIOPEN-2216)"); + } +} + +// Resolve the FINAL absolute/relative tolerance for an output tensor of the +// given dtype: the chosen aggregation policy's default (max-across-nodes unless +// overridden), then the TOML per-test override (highest priority) applied on top. +// This is the single tolerance entry point for both harnesses — neither applies +// the override separately, so the layering order (default -> override) lives in +// exactly one place. The aggregation policy is a parameter (default +// maxAcrossNodes) so a caller can select a different policy without any change +// here. +inline void resolveTolerance(const fb::GraphWrapper& wrapper, + data::DataType dataType, + const std::string& testName, + float& atol, + float& rtol, + AggregationPolicy aggregate = maxAcrossNodes) +{ + const float defaultTolerance = aggregate(wrapper, dataType); + atol = defaultTolerance; + rtol = defaultTolerance; + applyTomlToleranceOverride(testName, atol, rtol); +} + +} // namespace hipdnn_integration_tests::tolerance diff --git a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormBackward.cpp b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormBackward.cpp index fca914087f87..22289c0c4a02 100644 --- a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormBackward.cpp +++ b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormBackward.cpp @@ -127,29 +127,6 @@ class BatchnormBackward : public IntegrationGraphVerificationHarnessfillTensorWithRandomValues(-1.0f, 1.0f, seed); - bundle.tensors.at(BatchnormBwdTensorIds::DY_UID) - ->fillTensorWithRandomValues(-0.1f, 0.1f, seed); - bundle.tensors.at(BatchnormBwdTensorIds::SCALE_UID) - ->fillTensorWithRandomValues(-0.1f, 0.1f, seed); - - if(!CalcStats) - { - bundle.tensors.at(BatchnormBwdTensorIds::MEAN_UID) - ->fillTensorWithRandomValues(-0.1f, 0.1f, seed); - - bundle.tensors.at(BatchnormBwdTensorIds::INV_VARIANCE_UID) - ->fillTensorWithRandomValues(1.9f, 2.0f, seed); - } - } - void runGraphTest() override { const auto& testCase = this->GetParam(); diff --git a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardInferenceWithVariance.cpp b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardInferenceWithVariance.cpp index 46738378aadf..353948aabf8d 100644 --- a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardInferenceWithVariance.cpp +++ b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardInferenceWithVariance.cpp @@ -117,24 +117,6 @@ class BatchnormForwardInferenceWithVariance } protected: - void initializeBundle([[maybe_unused]] const graph::Graph& graph, - GraphTensorBundle& bundle, - unsigned int seed) override - { - bundle.sentinelFillOutputTensors(); - - bundle.tensors.at(BnInfVarTensorIds::X_UID)->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - bundle.tensors.at(BnInfVarTensorIds::MEAN_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - // Variance must be non-negative; use positive range - bundle.tensors.at(BnInfVarTensorIds::VARIANCE_UID) - ->fillTensorWithRandomValues(0.1f, 1.0f, seed); - bundle.tensors.at(BnInfVarTensorIds::SCALE_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - bundle.tensors.at(BnInfVarTensorIds::BIAS_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - } - void runGraphTest() override { const auto& testCase = this->GetParam(); diff --git a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardTraining.cpp b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardTraining.cpp index 979ad80feb5a..f50cc84e4d88 100644 --- a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardTraining.cpp +++ b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormForwardTraining.cpp @@ -208,41 +208,6 @@ class BatchnormForwardTraining } protected: - void initializeBundle([[maybe_unused]] const graph::Graph& graph, - GraphTensorBundle& bundle, - unsigned int seed) override - { - bundle.sentinelFillOutputTensors(); - - // Note: Epsilon and momentum are pass-by-value (set via set_value()), not buffers - - // X input: default range - bundle.tensors.at(BatchnormFwdTrainingTensorIds::X_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - - // Scale and bias: -2.0 to 2.0 to match MIOpen - bundle.tensors.at(BatchnormFwdTrainingTensorIds::SCALE_UID) - ->fillTensorWithRandomValues(-2.0f, 2.0f, seed + 1); - bundle.tensors.at(BatchnormFwdTrainingTensorIds::BIAS_UID) - ->fillTensorWithRandomValues(-2.0f, 2.0f, seed + 2); - - // Running mean: only initialize PREV (input), leave NEXT (output) with sentinel - if(bundle.tensors.find(BatchnormFwdTrainingTensorIds::PREV_RUNNING_MEAN_UID) - != bundle.tensors.end()) - { - bundle.tensors.at(BatchnormFwdTrainingTensorIds::PREV_RUNNING_MEAN_UID) - ->fillTensorWithRandomValues(-2.0f, 2.0f, seed + 1000); - } - - // Running variance: only initialize PREV (input), leave NEXT (output) with sentinel - if(bundle.tensors.find(BatchnormFwdTrainingTensorIds::PREV_RUNNING_VARIANCE_UID) - != bundle.tensors.end()) - { - bundle.tensors.at(BatchnormFwdTrainingTensorIds::PREV_RUNNING_VARIANCE_UID) - ->fillTensorWithRandomValues(-2.0f, 2.0f, seed + 2000); - } - } - void runGraphTest() override { const auto& testCase = this->GetParam(); diff --git a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormFwdInferenceVarianceActiv.cpp b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormFwdInferenceVarianceActiv.cpp index 3273a926df23..d136d07e90ce 100644 --- a/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormFwdInferenceVarianceActiv.cpp +++ b/dnn-providers/integration-tests/src/integration_tests/batchnorm/IntegrationGpuBatchnormFwdInferenceVarianceActiv.cpp @@ -149,25 +149,6 @@ class BatchnormFwdInferenceVarianceActiv } protected: - void initializeBundle([[maybe_unused]] const graph::Graph& graph, - GraphTensorBundle& bundle, - unsigned int seed) override - { - bundle.sentinelFillOutputTensors(); - - bundle.tensors.at(BnInfVarActivTensorIds::X_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - bundle.tensors.at(BnInfVarActivTensorIds::MEAN_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - // Variance must be non-negative; use positive range - bundle.tensors.at(BnInfVarActivTensorIds::VARIANCE_UID) - ->fillTensorWithRandomValues(0.1f, 1.0f, seed); - bundle.tensors.at(BnInfVarActivTensorIds::SCALE_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - bundle.tensors.at(BnInfVarActivTensorIds::BIAS_UID) - ->fillTensorWithRandomValues(-1.0f, 1.0f, seed); - } - void runGraphTest() override { const auto& testCase = this->GetParam(); diff --git a/dnn-providers/integration-tests/src/main.cpp b/dnn-providers/integration-tests/src/main.cpp index 47645cdbd9eb..a2b787ca18f7 100644 --- a/dnn-providers/integration-tests/src/main.cpp +++ b/dnn-providers/integration-tests/src/main.cpp @@ -21,7 +21,8 @@ #include "harness/SharedHandle.hpp" #include "harness/SupportMatrixCollector.hpp" #include "harness/TestConfig.hpp" -#include "harness/golden/BundleRegistration.hpp" +#include "harness/bundle/BundleRegistration.hpp" +#include "harness/bundle/UnverifiableBundleReport.hpp" namespace { @@ -96,10 +97,17 @@ int main(int argc, char** argv) noexcept .implicit_value(true) .help("Enable golden reference bundle test registration. " "Can also be set via HIPDNN_TEST_ALLOW_BUNDLES=1 env var."); - parser.add_argument("--golden-data-dir") + parser.add_argument("--gd", "--golden-data-dir") .help("Path to the integration test bundle data directory. " "Defaults to /../lib/integration_test_bundles/. " "Can also be set via HIPDNN_TEST_GOLDEN_DATA_DIR env var."); + // --verification-mode governs BUNDLE tests (how the engine's output is + // verified). It is independent of --reference-executor, which governs the + // parameterized tests (which ref executor is exercised as the SUT). + parser.add_argument("--vm", "--verification-mode") + .help("How bundle engine output is verified: 'auto' (default; golden -> " + "GPU ref -> CPU ref -> skip), 'golden', 'gpu', or 'cpu'. " + "Can also be set via HIPDNN_TEST_VERIFICATION_MODE env var."); std::vector remainingArgs; try @@ -169,6 +177,22 @@ int main(int argc, char** argv) noexcept goldenDataDir = parser.get("--golden-data-dir"); } + // Parse --verification-mode (case-insensitive); invalid value -> exit 1. + std::optional verificationMode; + if(parser.is_used("--verification-mode")) + { + try + { + verificationMode = hipdnn_integration_tests::parseVerificationMode( + parser.get("--verification-mode")); + } + catch(const std::exception& e) + { + std::cerr << "Error: " << e.what() << '\n'; + return 1; + } + } + // Parse --test-article argument and load explicit plugin if provided std::optional articlePath; if(parser.is_used("--test-article")) @@ -211,7 +235,8 @@ int main(int argc, char** argv) noexcept std::move(configPath), refExecType, allowBundles, - std::move(goldenDataDir)); + std::move(goldenDataDir), + verificationMode); // Reconstruct argc/argv for GTest from remaining (unknown) args. // argv[0] (program name) must be first — GTest requires it. @@ -267,10 +292,14 @@ int main(int argc, char** argv) noexcept return 1; } - hipdnn_integration_tests::golden::registerBundleTests(); + hipdnn_integration_tests::bundle::registerBundleTests(); const int result = RUN_ALL_TESTS(); + // Print bundles that ended without a verdict (no oracle / reference bug). + // Informational only — these SKIP, so they do not affect `result`. + hipdnn_integration_tests::bundle::UnverifiableBundleReport::get().print(); + // Generate support matrix if requested if(hipdnn_integration_tests::SupportMatrixCollector::get().isEnabled()) { diff --git a/dnn-providers/integration-tests/tests/CMakeLists.txt b/dnn-providers/integration-tests/tests/CMakeLists.txt index eeb4e99d84bc..9bd2c2970847 100644 --- a/dnn-providers/integration-tests/tests/CMakeLists.txt +++ b/dnn-providers/integration-tests/tests/CMakeLists.txt @@ -3,6 +3,7 @@ add_executable(hipdnn_integration_tests_unit_tests main.cpp + ../src/harness/bundle/IntegrationBundleVerificationHarness.cpp TestArchMatch.cpp TestBundleMetadata.cpp TestGraphDescription.cpp @@ -17,7 +18,10 @@ add_executable(hipdnn_integration_tests_unit_tests TestReferenceGraphExecutorFactory.cpp TestBundleDiscovery.cpp TestVerificationPaths.cpp - TestGoldenVerificationHarness.cpp + TestBundleVerificationHarness.cpp + TestSynthesisTracker.cpp + TestSynthesizeInputs.cpp + TestVerificationModePaths.cpp ) target_include_directories(hipdnn_integration_tests_unit_tests diff --git a/dnn-providers/integration-tests/tests/TestBundleDiscovery.cpp b/dnn-providers/integration-tests/tests/TestBundleDiscovery.cpp index f3b8b8179c8d..513dfd4be142 100644 --- a/dnn-providers/integration-tests/tests/TestBundleDiscovery.cpp +++ b/dnn-providers/integration-tests/tests/TestBundleDiscovery.cpp @@ -12,10 +12,10 @@ #include #include -#include "harness/golden/BundleDiscovery.hpp" -#include "harness/golden/IntegrationTestBundle.hpp" +#include "harness/bundle/BundleDiscovery.hpp" +#include "harness/bundle/IntegrationTestBundle.hpp" -using namespace hipdnn_integration_tests::golden; +using namespace hipdnn_integration_tests::bundle; // NOLINTBEGIN(readability-identifier-naming) @@ -63,9 +63,10 @@ class TestBundleDiscoveryFixture : public ::testing::Test R"("compute_data_type": "float", "intermediate_data_type": "float", "name": ""})"; } - // Writes a valid {name}.meta.json companion. Metadata is mandatory for any - // bundle expected to load successfully (loadIntegrationTestBundle returns - // LoadError::MISSING_METADATA without it). + // Writes a valid {name}.meta.json companion. Metadata is mandatory for a + // golden bundle (one shipping output .bin blobs) — loadIntegrationTestBundle + // returns LoadError::MISSING_METADATA for those without it — and optional for + // a no-golden / graph-only bundle. static void writeMetadata(const std::filesystem::path& dir, const std::string& name) { std::ofstream(dir / (name + ".meta.json")) @@ -143,11 +144,14 @@ TEST_F(TestBundleDiscoveryFixture, TieredGoldenDataLayoutIsDiscovered) EXPECT_EQ(result.front().testName, "Small"); } -TEST_F(TestBundleDiscoveryFixture, JsonAtRootThrows) +TEST_F(TestBundleDiscoveryFixture, JsonAtRootUsesFolderNameAsSuite) { - // A .json directly at the data root has no folder to form a suite -> throw. + // A .json directly at the data root uses the root folder name as suite. std::ofstream(_tempDir / "graph.json") << R"({"tensors": []})"; - EXPECT_THROW(discoverBundles(_tempDir), std::runtime_error); + auto result = discoverBundles(_tempDir); + ASSERT_EQ(result.size(), 1u); + EXPECT_EQ(result[0].suiteName, sanitizeForGtest(_tempDir.filename().string())); + EXPECT_EQ(result[0].testName, "graph"); } TEST_F(TestBundleDiscoveryFixture, EmptyLeafFolderWarnsAndSkips) @@ -339,14 +343,33 @@ TEST_F(TestBundleDiscoveryFixture, LoadBundlePopulatesMetadataWhenPresent) EXPECT_EQ(*bundle.metadata.seed, 42); } -// A valid-graph bundle WITHOUT a .meta.json companion is a load error: metadata -// is mandatory. -TEST_F(TestBundleDiscoveryFixture, LoadBundleMissingMetadataIsError) +// A graph-only bundle (no .bin blobs, hence no golden data) without a .meta.json +// companion loads successfully: metadata validates golden data, and there is +// none here, so absent metadata is valid and default-constructed. +TEST_F(TestBundleDiscoveryFixture, LoadGraphOnlyBundleMissingMetadataLoads) { auto dir = _tempDir / "op" / "nometa"; - createMinimalBundle(dir, "nometa"); // graph only, no .meta.json + createMinimalBundle(dir, "nometa"); // graph only, no .meta.json, no .bin const auto jsonPath = dir / "nometa.json"; + auto result = loadIntegrationTestBundle(jsonPath); + ASSERT_TRUE(std::holds_alternative(result)); + const auto& bundle = std::get(result); + + EXPECT_FALSE(bundle.tensors.has_value()); // graph-only: no tensor data + EXPECT_FALSE(bundle.hasGoldenOutputs); + EXPECT_FALSE(bundle.metadata.operation.has_value()); // default-constructed +} + +// A GOLDEN bundle (output .bin blobs present) WITHOUT a .meta.json companion is +// a load error: metadata is mandatory whenever there is golden data to validate. +TEST_F(TestBundleDiscoveryFixture, LoadGoldenBundleMissingMetadataIsError) +{ + auto dir = _tempDir / "op" / "goldennometa"; + createLoadableBundle(dir, "goldennometa"); // writes .bin (inputs+outputs) + meta + std::filesystem::remove(dir / "goldennometa.meta.json"); // drop the metadata + const auto jsonPath = dir / "goldennometa.json"; + auto result = loadIntegrationTestBundle(jsonPath); ASSERT_TRUE(std::holds_alternative(result)); EXPECT_EQ(std::get(result), LoadError::MISSING_METADATA); @@ -359,7 +382,7 @@ TEST_F(TestBundleDiscoveryFixture, LoadBundleMissingBinIsGraphOnly) { auto dir = _tempDir / "op" / "nobin"; createMinimalBundle(dir, "nobin"); - writeMetadata(dir, "nobin"); // metadata is mandatory even for graph-only + writeMetadata(dir, "nobin"); // metadata present (optional here, but exercised) const auto jsonPath = dir / "nobin.json"; auto result = loadIntegrationTestBundle(jsonPath); diff --git a/dnn-providers/integration-tests/tests/TestGoldenVerificationHarness.cpp b/dnn-providers/integration-tests/tests/TestBundleVerificationHarness.cpp similarity index 94% rename from dnn-providers/integration-tests/tests/TestGoldenVerificationHarness.cpp rename to dnn-providers/integration-tests/tests/TestBundleVerificationHarness.cpp index 95dcfb6ea887..cffe75ed9383 100644 --- a/dnn-providers/integration-tests/tests/TestGoldenVerificationHarness.cpp +++ b/dnn-providers/integration-tests/tests/TestBundleVerificationHarness.cpp @@ -1,7 +1,7 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Unit tests for IntegrationGraphGoldenReferenceVerificationHarness's core +// Unit tests for IntegrationBundleVerificationHarness's core // contract: how it translates an executor's behaviour into a GTest outcome. // // executor throws (unsupported graph) -> SKIP @@ -27,12 +27,12 @@ #include -#include "harness/golden/IntegrationGraphGoldenReferenceVerificationHarness.hpp" -#include "harness/golden/IntegrationTestBundle.hpp" +#include "harness/bundle/IntegrationBundleVerificationHarness.hpp" +#include "harness/bundle/IntegrationTestBundle.hpp" // NOLINTBEGIN(readability-identifier-naming) -using namespace hipdnn_integration_tests::golden; +using namespace hipdnn_integration_tests::bundle; namespace { @@ -40,19 +40,19 @@ namespace // Exposes the harness's protected SetUp/TestBody so a test can drive the full // lifecycle directly, and overrides executeGraphThroughEngine with a stub so the // tests run on CPU-only CI without a real GPU engine. -class TestableHarness : public IntegrationGraphGoldenReferenceVerificationHarness +class TestableHarness : public IntegrationBundleVerificationHarness { public: using StubFunc = std::function&)>; explicit TestableHarness(StubFunc stub) - : IntegrationGraphGoldenReferenceVerificationHarness(/*requiresDevice=*/false) + : IntegrationBundleVerificationHarness(/*requiresDevice=*/false) , _stub(std::move(stub)) { } - using IntegrationGraphGoldenReferenceVerificationHarness::SetUp; - using IntegrationGraphGoldenReferenceVerificationHarness::TestBody; + using IntegrationBundleVerificationHarness::SetUp; + using IntegrationBundleVerificationHarness::TestBody; protected: void executeGraphThroughEngine(std::unordered_map& variantPack) override diff --git a/dnn-providers/integration-tests/tests/TestSynthesisTracker.cpp b/dnn-providers/integration-tests/tests/TestSynthesisTracker.cpp new file mode 100644 index 000000000000..ac6e3e930d7c --- /dev/null +++ b/dnn-providers/integration-tests/tests/TestSynthesisTracker.cpp @@ -0,0 +1,175 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include + +#include + +#include "harness/input_init/SynthesisTracker.hpp" + +// NOLINTBEGIN(readability-identifier-naming) + +using namespace hipdnn_integration_tests; + +namespace +{ + +InputTensorMap makeTensors(const std::vector& uids) +{ + InputTensorMap map; + for(const int64_t uid : uids) + { + map[uid] = std::make_unique>( + std::vector{2, 3}, std::vector{3, 1}); + map[uid]->fillTensorWithValue(0.f); + } + return map; +} + +} // namespace + +// All owned inputs declared FREE -> ok(). +TEST(TestSynthesisTracker, AllFreeSucceeds) +{ + auto inputs = makeTensors({1, 2, 3}); + const std::vector owned = {1, 2, 3}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.fillFree(2, -1.f, 1.f, rng); + tracker.fillFree(3, -1.f, 1.f, rng); + + const auto result = tracker.finish("TestOp"); + EXPECT_TRUE(result.filled); +} + +// An owned input left undeclared -> unsupported(). +TEST(TestSynthesisTracker, UndeclaredInputFails) +{ + auto inputs = makeTensors({1, 2, 3}); + const std::vector owned = {1, 2, 3}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + // uid 2 and 3 never declared + + const auto result = tracker.finish("TestOp"); + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("uid=2"), std::string::npos); + EXPECT_NE(result.reason.find("uid=3"), std::string::npos); +} + +// A STRUCTURED input -> unsupported() with diagnostic. +TEST(TestSynthesisTracker, StructuredInputFails) +{ + auto inputs = makeTensors({1, 2}); + const std::vector owned = {1, 2}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.markStructured(2, "page_table"); + + const auto result = tracker.finish("TestOp"); + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("page_table"), std::string::npos); + EXPECT_NE(result.reason.find("structured"), std::string::npos); +} + +// A DERIVED input -> unsupported() with diagnostic. +TEST(TestSynthesisTracker, DerivedInputFails) +{ + auto inputs = makeTensors({1, 2}); + const std::vector owned = {1, 2}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.markDerived(2, "forward_output"); + + const auto result = tracker.finish("TestOp"); + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("forward_output"), std::string::npos); + EXPECT_NE(result.reason.find("derived"), std::string::npos); +} + +// uid 0 (absent optional tensor) is silently ignored, not treated as owned. +TEST(TestSynthesisTracker, ZeroUidIgnored) +{ + auto inputs = makeTensors({1}); + const std::vector owned = {1}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.markStructured(0, "absent_optional"); + + const auto result = tracker.finish("TestOp"); + EXPECT_TRUE(result.filled); +} + +// A uid not in the owned set is silently ignored. +TEST(TestSynthesisTracker, NonOwnedUidIgnored) +{ + auto inputs = makeTensors({1, 99}); + const std::vector owned = {1}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.fillFree(99, -1.f, 1.f, rng); // not owned, ignored + + const auto result = tracker.finish("TestOp"); + EXPECT_TRUE(result.filled); +} + +// Empty owned set -> ok() trivially (no inputs to account for). +TEST(TestSynthesisTracker, EmptyOwnedSucceeds) +{ + InputTensorMap inputs; + const std::vector owned; + + const SynthesisTracker tracker(owned, inputs); + + const auto result = tracker.finish("TestOp"); + EXPECT_TRUE(result.filled); +} + +// Mixed: some FREE, one STRUCTURED, one undeclared -> both problems reported. +TEST(TestSynthesisTracker, MixedFailuresReportAll) +{ + auto inputs = makeTensors({1, 2, 3}); + const std::vector owned = {1, 2, 3}; + std::mt19937 rng(42); + + SynthesisTracker tracker(owned, inputs); + tracker.fillFree(1, -1.f, 1.f, rng); + tracker.markStructured(2, "seed"); + // uid 3 undeclared + + const auto result = tracker.finish("TestOp"); + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("seed"), std::string::npos); + EXPECT_NE(result.reason.find("uid=3"), std::string::npos); +} + +// SynthesisResult::ok() and ::unsupported() factory methods. +TEST(TestSynthesisResult, FactoryMethods) +{ + const auto ok = SynthesisResult::ok(); + EXPECT_TRUE(ok.filled); + EXPECT_TRUE(ok.reason.empty()); + + const auto bad = SynthesisResult::unsupported("cannot synthesize X"); + EXPECT_FALSE(bad.filled); + EXPECT_EQ(bad.reason, "cannot synthesize X"); +} + +// NOLINTEND(readability-identifier-naming) diff --git a/dnn-providers/integration-tests/tests/TestSynthesizeInputs.cpp b/dnn-providers/integration-tests/tests/TestSynthesizeInputs.cpp new file mode 100644 index 000000000000..c9de5c4c1405 --- /dev/null +++ b/dnn-providers/integration-tests/tests/TestSynthesizeInputs.cpp @@ -0,0 +1,434 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "harness/input_init/SynthesizeInputs.hpp" + +// NOLINTBEGIN(readability-identifier-naming) + +using namespace hipdnn_flatbuffers_sdk::data_objects; +using namespace hipdnn_integration_tests; + +namespace +{ + +const std::vector kDims = {2, 3}; +const std::vector kStrides = {3, 1}; + +InputTensorMap makeTensors(const std::vector& uids) +{ + InputTensorMap map; + for(const int64_t uid : uids) + { + map[uid] = std::make_unique>(kDims, kStrides); + map[uid]->fillTensorWithValue(0.f); + } + return map; +} + +struct GraphResult +{ + flatbuffers::FlatBufferBuilder builder; + const Graph* graph = nullptr; + + const Node& node(uint32_t i) const + { + return *graph->nodes()->Get(i); + } + + std::vector leafInputUids(const std::set& outputUids) const + { + std::vector uids; + for(const auto* t : *graph->tensors()) + { + if(!t->virtual_() && outputUids.count(t->uid()) == 0) + { + uids.push_back(t->uid()); + } + } + return uids; + } +}; + +// ── Conv fwd (single node) ────────────────────────────────────────────────── + +GraphResult buildConvFwdGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "x", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "w", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 3, "y", DataType::FLOAT, &kStrides, &kDims)); + + auto conv = CreateConvolutionFwdAttributesDirect(b, 1, 2, 3); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "conv", DataType::FLOAT, NodeAttributes::ConvolutionFwdAttributes, conv.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── Conv + bias (2-node fused) ────────────────────────────────────────────── +// conv.y (uid 10) is virtual; bias (uid 4) is leaf + +GraphResult buildConvBiasGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "x", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "w", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 10, "conv_y", DataType::FLOAT, &kStrides, &kDims, true)); + tensors.push_back( + CreateTensorAttributesDirect(b, 4, "bias", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 5, "out", DataType::FLOAT, &kStrides, &kDims)); + + auto conv = CreateConvolutionFwdAttributesDirect(b, 1, 2, 10); + auto add = CreatePointwiseAttributes(b, + PointwiseMode::ADD, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + 10, + 4, + flatbuffers::nullopt, + 5); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "conv", DataType::FLOAT, NodeAttributes::ConvolutionFwdAttributes, conv.Union())); + nodes.push_back(CreateNodeDirect( + b, "bias_add", DataType::FLOAT, NodeAttributes::PointwiseAttributes, add.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── Conv + bias + relu (3-node fused) ─────────────────────────────────────── +// conv.y (uid 10) virtual, bias_add.out (uid 11) virtual, relu.in_0=uid 11, relu.out_0=uid 6 + +GraphResult buildConvBiasReluGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "x", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "w", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 10, "conv_y", DataType::FLOAT, &kStrides, &kDims, true)); + tensors.push_back( + CreateTensorAttributesDirect(b, 4, "bias", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 11, "bias_out", DataType::FLOAT, &kStrides, &kDims, true)); + tensors.push_back( + CreateTensorAttributesDirect(b, 6, "out", DataType::FLOAT, &kStrides, &kDims)); + + auto conv = CreateConvolutionFwdAttributesDirect(b, 1, 2, 10); + auto add = CreatePointwiseAttributes(b, + PointwiseMode::ADD, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + 10, + 4, + flatbuffers::nullopt, + 11); + auto relu = CreatePointwiseAttributes(b, + PointwiseMode::RELU_FWD, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + 11, + flatbuffers::nullopt, + flatbuffers::nullopt, + 6); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "conv", DataType::FLOAT, NodeAttributes::ConvolutionFwdAttributes, conv.Union())); + nodes.push_back(CreateNodeDirect( + b, "bias_add", DataType::FLOAT, NodeAttributes::PointwiseAttributes, add.Union())); + nodes.push_back(CreateNodeDirect( + b, "relu", DataType::FLOAT, NodeAttributes::PointwiseAttributes, relu.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── SDPA forward (no structured optionals) ────────────────────────────────── + +GraphResult buildSdpaFwdGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "q", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "k", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 3, "v", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 4, "o", DataType::FLOAT, &kStrides, &kDims)); + + auto sdpa = CreateSdpaAttributes(b, 1, 2, 3, 4); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "sdpa_fwd", DataType::FLOAT, NodeAttributes::SdpaAttributes, sdpa.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── SDPA forward with structured seq_len_q ────────────────────────────────── + +GraphResult buildSdpaFwdWithStructuredGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "q", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "k", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 3, "v", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 4, "o", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 5, "seq_len_q", DataType::FLOAT, &kStrides, &kDims)); + + auto sdpa = CreateSdpaAttributes(b, + 1, + 2, + 3, + 4, + flatbuffers::nullopt, // attn_mask + flatbuffers::nullopt, // scale + 5); // seq_len_q + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "sdpa_fwd", DataType::FLOAT, NodeAttributes::SdpaAttributes, sdpa.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── SDPA backward standalone ──────────────────────────────────────────────── +// O and stats are leaf inputs (not virtual) → DERIVED → refuses + +GraphResult buildSdpaBwdStandaloneGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "q", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "k", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 3, "v", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 4, "o", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 5, "do", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 6, "stats", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 7, "dq", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 8, "dk", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 9, "dv", DataType::FLOAT, &kStrides, &kDims)); + + auto bwd = CreateSdpaBackwardAttributes(b, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "sdpa_bwd", DataType::FLOAT, NodeAttributes::SdpaBackwardAttributes, bwd.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +// ── SDPA fwd+bwd fused ───────────────────────────────────────────────────── +// O (uid 10) and stats (uid 11) are virtual inter-node tensors. +// Leaf inputs: Q(1), K(2), V(3) from fwd + dO(5) from bwd. +// Outputs: dQ(7), dK(8), dV(9). + +GraphResult buildSdpaFwdBwdFusedGraph() +{ + GraphResult r; + auto& b = r.builder; + + std::vector> tensors; + tensors.push_back(CreateTensorAttributesDirect(b, 1, "q", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 2, "k", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 3, "v", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back( + CreateTensorAttributesDirect(b, 10, "o", DataType::FLOAT, &kStrides, &kDims, true)); + tensors.push_back( + CreateTensorAttributesDirect(b, 11, "stats", DataType::FLOAT, &kStrides, &kDims, true)); + tensors.push_back(CreateTensorAttributesDirect(b, 5, "do", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 7, "dq", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 8, "dk", DataType::FLOAT, &kStrides, &kDims)); + tensors.push_back(CreateTensorAttributesDirect(b, 9, "dv", DataType::FLOAT, &kStrides, &kDims)); + + auto fwd = CreateSdpaAttributes(b, + 1, + 2, + 3, + 10, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + flatbuffers::nullopt, + 11); // stats_tensor_uid + + auto bwd = CreateSdpaBackwardAttributes(b, 1, 2, 3, 10, 5, 11, 7, 8, 9); + + std::vector> nodes; + nodes.push_back(CreateNodeDirect( + b, "sdpa_fwd", DataType::FLOAT, NodeAttributes::SdpaAttributes, fwd.Union())); + nodes.push_back(CreateNodeDirect( + b, "sdpa_bwd", DataType::FLOAT, NodeAttributes::SdpaBackwardAttributes, bwd.Union())); + + auto graph = CreateGraphDirect( + b, "test", DataType::FLOAT, DataType::FLOAT, DataType::FLOAT, &tensors, &nodes); + b.Finish(graph); + + r.graph = GetGraph(b.GetBufferPointer()); + return r; +} + +SynthesisResult runSynthesis(const GraphResult& gr, const std::set& outputUids) +{ + const auto leafUids = gr.leafInputUids(outputUids); + auto inputs = makeTensors(leafUids); + std::mt19937 rng(42); + + SynthesisTracker tracker(leafUids, inputs); + for(uint32_t i = 0; i < gr.graph->nodes()->size(); ++i) + { + const SynthesisResult nodeResult + = synthesizeNodeInputs(*gr.graph->nodes()->Get(i), tracker, rng); + if(!nodeResult.filled) + { + return nodeResult; + } + } + return tracker.finish("test"); +} + +} // namespace + +// ── Test cases ────────────────────────────────────────────────────────────── + +TEST(TestSynthesizeInputs, SingleConvFwd) +{ + const auto gr = buildConvFwdGraph(); + const auto result = runSynthesis(gr, {3}); + + EXPECT_TRUE(result.filled) << result.reason; +} + +TEST(TestSynthesizeInputs, ConvPlusBiasFused) +{ + const auto gr = buildConvBiasGraph(); + const auto result = runSynthesis(gr, {5}); + + EXPECT_TRUE(result.filled) << result.reason; +} + +TEST(TestSynthesizeInputs, ConvPlusBiasPlusReluFused) +{ + const auto gr = buildConvBiasReluGraph(); + const auto result = runSynthesis(gr, {6}); + + EXPECT_TRUE(result.filled) << result.reason; +} + +TEST(TestSynthesizeInputs, SdpaFwdNoStructuredOptionals) +{ + const auto gr = buildSdpaFwdGraph(); + const auto result = runSynthesis(gr, {4}); + + EXPECT_TRUE(result.filled) << result.reason; +} + +TEST(TestSynthesizeInputs, SdpaFwdWithStructuredInputRefuses) +{ + const auto gr = buildSdpaFwdWithStructuredGraph(); + const auto result = runSynthesis(gr, {4}); + + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("seq_len_q"), std::string::npos); + EXPECT_NE(result.reason.find("structured"), std::string::npos); +} + +TEST(TestSynthesizeInputs, SdpaBwdStandaloneRefusesDerived) +{ + const auto gr = buildSdpaBwdStandaloneGraph(); + const auto result = runSynthesis(gr, {7, 8, 9}); + + EXPECT_FALSE(result.filled); + EXPECT_NE(result.reason.find("derived"), std::string::npos); +} + +TEST(TestSynthesizeInputs, SdpaFwdBwdFusedSucceeds) +{ + const auto gr = buildSdpaFwdBwdFusedGraph(); + const auto result = runSynthesis(gr, {7, 8, 9}); + + EXPECT_TRUE(result.filled) << result.reason; +} + +// NOLINTEND(readability-identifier-naming) diff --git a/dnn-providers/integration-tests/tests/TestTestConfig.cpp b/dnn-providers/integration-tests/tests/TestTestConfig.cpp index 4f45e39c103a..f6da2da519f8 100644 --- a/dnn-providers/integration-tests/tests/TestTestConfig.cpp +++ b/dnn-providers/integration-tests/tests/TestTestConfig.cpp @@ -67,6 +67,72 @@ TEST(TestConfigUninitialized, GetReferenceExecutorTypeThrowsWhenUninitialized) EXPECT_THROW(TestConfig::get().getReferenceExecutorType(), std::runtime_error); } +// parseVerificationMode is a free function (no singleton state), so it can be +// exercised regardless of initialization. +TEST(TestParseVerificationMode, AcceptsAllValidValuesCaseInsensitive) +{ + using hipdnn_integration_tests::parseVerificationMode; + using hipdnn_integration_tests::VerificationMode; + + EXPECT_EQ(parseVerificationMode("auto"), VerificationMode::AUTO); + EXPECT_EQ(parseVerificationMode("golden"), VerificationMode::GOLDEN); + EXPECT_EQ(parseVerificationMode("gpu"), VerificationMode::GPU); + EXPECT_EQ(parseVerificationMode("cpu"), VerificationMode::CPU); + + EXPECT_EQ(parseVerificationMode("AUTO"), VerificationMode::AUTO); + EXPECT_EQ(parseVerificationMode("Golden"), VerificationMode::GOLDEN); + EXPECT_EQ(parseVerificationMode("GPU"), VerificationMode::GPU); +} + +TEST(TestParseVerificationMode, ThrowsOnInvalidValue) +{ + EXPECT_THROW(hipdnn_integration_tests::parseVerificationMode("bogus"), std::runtime_error); + EXPECT_THROW(hipdnn_integration_tests::parseVerificationMode(""), std::runtime_error); +} + +// resolveVerificationMode / resolveGoldenDataDir are free functions that +// implement the "CLI wins, then env, then nullopt" precedence chain. +// They don't touch the singleton so they can be tested freely. + +TEST(TestResolveVerificationMode, CliValueWinsOverEnv) +{ + using hipdnn_integration_tests::resolveVerificationMode; + using hipdnn_integration_tests::VerificationMode; + + // Even if the env var were set, the CLI value takes precedence. + const auto result = resolveVerificationMode(VerificationMode::GPU); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(*result, VerificationMode::GPU); +} + +TEST(TestResolveVerificationMode, NulloptCliWithoutEnvReturnsNullopt) +{ + using hipdnn_integration_tests::resolveVerificationMode; + + // Assuming HIPDNN_TEST_VERIFICATION_MODE is not set in the test env. + const auto result = resolveVerificationMode(std::nullopt); + EXPECT_FALSE(result.has_value()); +} + +TEST(TestResolveGoldenDataDir, CliValueWinsOverEnv) +{ + using hipdnn_integration_tests::resolveGoldenDataDir; + + const std::filesystem::path cliPath = "/explicit/golden/dir"; + const auto result = resolveGoldenDataDir(cliPath); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(*result, cliPath); +} + +TEST(TestResolveGoldenDataDir, NulloptCliWithoutEnvReturnsNullopt) +{ + using hipdnn_integration_tests::resolveGoldenDataDir; + + // Assuming HIPDNN_TEST_GOLDEN_DATA_DIR is not set in the test env. + const auto result = resolveGoldenDataDir(std::nullopt); + EXPECT_FALSE(result.has_value()); +} + // --------------------------------------------------------------------------- // Suite 2 – initialized singleton (all args provided) // --------------------------------------------------------------------------- @@ -129,6 +195,23 @@ TEST_F(TestConfigInitialized, GetReferenceExecutorTypeDefaultsToCpu) hipdnn_integration_tests::ReferenceExecutorType::CPU); } +TEST_F(TestConfigInitialized, GetVerificationModeDefaultsToAuto) +{ + // No CLI flag and (assuming) no env var -> AUTO. + EXPECT_EQ(TestConfig::get().getVerificationMode(), + hipdnn_integration_tests::VerificationMode::AUTO); +} + +TEST_F(TestConfigInitialized, HasGoldenDataDirReturnsFalseWhenNotProvided) +{ + EXPECT_FALSE(TestConfig::get().hasGoldenDataDir()); +} + +TEST_F(TestConfigInitialized, GetGoldenDataDirThrowsWhenNotProvided) +{ + EXPECT_THROW(TestConfig::get().getGoldenDataDir(), std::runtime_error); +} + TEST_F(TestConfigInitialized, DoubleInitializeThrows) { EXPECT_THROW(TestConfig::initialize(std::nullopt, std::nullopt), std::runtime_error); diff --git a/dnn-providers/integration-tests/tests/TestVerificationModePaths.cpp b/dnn-providers/integration-tests/tests/TestVerificationModePaths.cpp new file mode 100644 index 000000000000..be39945b972d --- /dev/null +++ b/dnn-providers/integration-tests/tests/TestVerificationModePaths.cpp @@ -0,0 +1,411 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +// Tests the verification mode dispatch logic in the harness: +// +// AUTO mode: golden → GPU ref → CPU ref → SKIP +// GOLDEN mode: golden or SKIP +// GPU/CPU mode: explicit ref or SKIP/FAIL +// +// Each test overrides getVerificationMode() and the executor stubs to exercise +// one branch without touching the TestConfig singleton. + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "harness/ReferenceCapabilityError.hpp" +#include "harness/TestConfig.hpp" +#include "harness/bundle/IntegrationBundleVerificationHarness.hpp" +#include "harness/bundle/IntegrationTestBundle.hpp" + +// NOLINTBEGIN(readability-identifier-naming) + +using namespace hipdnn_integration_tests; +using namespace hipdnn_integration_tests::bundle; + +namespace +{ + +using EngineStub = std::function&)>; +using RefStub = std::function&)>; + +class ModeTestableHarness : public IntegrationBundleVerificationHarness +{ +public: + ModeTestableHarness(VerificationMode mode, EngineStub engineStub, RefStub refStub) + : IntegrationBundleVerificationHarness(/*requiresDevice=*/false) + , _mode(mode) + , _engineStub(std::move(engineStub)) + , _refStub(std::move(refStub)) + { + } + + using IntegrationBundleVerificationHarness::SetUp; + using IntegrationBundleVerificationHarness::TestBody; + +protected: + VerificationMode getVerificationMode() const override + { + return _mode; + } + + void executeGraphThroughEngine(std::unordered_map& variantPack) override + { + _engineStub(variantPack); + } + + void runReferenceExecutor(ReferenceExecutorType type, + std::unordered_map& variantPack) override + { + _refStub(type, variantPack); + } + + std::unique_ptr + makeReferenceExecutor(ReferenceExecutorType /*type*/) override + { + return nullptr; + } + + // These tests exercise verification-mode dispatch, not the VRAM/arch + // hardware guards. Override to a no-op so they don't reach into the + // (uninitialized-in-this-binary) TestConfig singleton. + void applyMetadataGuards() const override {} + +private: + VerificationMode _mode; + EngineStub _engineStub; + RefStub _refStub; +}; + +class TestVerificationModePathsFixture : public ::testing::Test +{ +protected: + std::optional _scopedDir; + std::filesystem::path _tempDir; + + void SetUp() override + { + auto path + = std::filesystem::temp_directory_path() + / ("vmode_test_" + + std::to_string(::testing::UnitTest::GetInstance()->current_test_info()->line())); + std::filesystem::remove_all(path); + _scopedDir.emplace(path); + _tempDir = _scopedDir->path(); + } + + static constexpr float K_OUTPUT_VALUE = 3.5f; + static constexpr int64_t K_OUTPUT_UID = 5; + static constexpr size_t K_OUTPUT_ELEMS = 120; + + static void writeBundleFiles(const std::filesystem::path& dir, + const std::string& name, + bool includeGoldenOutput) + { + std::filesystem::create_directories(dir); + std::ofstream(dir / (name + ".json")) + << R"({"nodes": [{"inputs": {"x_tensor_uid": 0, "mean_tensor_uid": 1, )" + R"("inv_variance_tensor_uid": 2, "scale_tensor_uid": 3, "bias_tensor_uid": 4}, )" + R"("outputs": {"y_tensor_uid": 5}, "type": "BatchnormInferenceAttributes", )" + R"("compute_data_type": "float", "name": ""}], "tensors": [)" + R"({"name": "", "uid": 0, "strides": [60, 20, 5, 1], "dims": [2, 3, 4, 5], )" + R"("data_type": "float", "virtual": false}, )" + R"({"name": "", "uid": 1, "strides": [3, 1, 1, 1], "dims": [1, 3, 1, 1], )" + R"("data_type": "float", "virtual": false}, )" + R"({"name": "", "uid": 2, "strides": [3, 1, 1, 1], "dims": [1, 3, 1, 1], )" + R"("data_type": "float", "virtual": false}, )" + R"({"name": "", "uid": 3, "strides": [3, 1, 1, 1], "dims": [1, 3, 1, 1], )" + R"("data_type": "float", "virtual": false}, )" + R"({"name": "", "uid": 4, "strides": [3, 1, 1, 1], "dims": [1, 3, 1, 1], )" + R"("data_type": "float", "virtual": false}, )" + R"({"name": "", "uid": 5, "strides": [60, 20, 5, 1], "dims": [2, 3, 4, 5], )" + R"("data_type": "float", "virtual": false}], "io_data_type": "float", )" + R"("compute_data_type": "float", "intermediate_data_type": "float", "name": ""})"; + + std::ofstream(dir / (name + ".meta.json")) + << R"({"format_version": 1, "operation": "BatchnormInference"})"; + + const auto basePath = (dir / name).string(); + const auto writeFloatBin = [&](int64_t uid, size_t elems, float value) { + const std::vector data(elems, value); + std::ofstream out(basePath + ".tensor" + std::to_string(uid) + ".bin", + std::ios::binary); + out.write(reinterpret_cast(data.data()), + static_cast(data.size() * sizeof(float))); + }; + + writeFloatBin(0, 120, 0.0f); + writeFloatBin(1, 3, 0.0f); + writeFloatBin(2, 3, 0.0f); + writeFloatBin(3, 3, 0.0f); + writeFloatBin(4, 3, 0.0f); + + if(includeGoldenOutput) + { + writeFloatBin(K_OUTPUT_UID, K_OUTPUT_ELEMS, K_OUTPUT_VALUE); + } + } + + std::shared_ptr loadBundle(const std::string& name, + bool includeGoldenOutput) const + { + const auto dir = _tempDir / name; + writeBundleFiles(dir, name, includeGoldenOutput); + auto result = loadIntegrationTestBundle(dir / (name + ".json")); + EXPECT_TRUE(std::holds_alternative(result)); + return std::make_shared( + std::move(std::get(result))); + } + + static void writeOutput(std::unordered_map& variantPack, float value) + { + auto* ptr = static_cast(variantPack.at(K_OUTPUT_UID)); + std::fill(ptr, ptr + K_OUTPUT_ELEMS, value); + } + + static void runCapturing(std::shared_ptr bundle, + VerificationMode mode, + EngineStub engineStub, + RefStub refStub, + ::testing::TestPartResultArray* results) + { + ModeTestableHarness harness(mode, std::move(engineStub), std::move(refStub)); + harness.setBundle(std::move(bundle), "vmode-test-bundle"); + + const ::testing::ScopedFakeTestPartResultReporter reporter( + ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, results); + harness.SetUp(); + harness.TestBody(); + } + + static bool anySkipped(const ::testing::TestPartResultArray& results) + { + for(int i = 0; i < results.size(); ++i) + { + if(results.GetTestPartResult(i).skipped()) + { + return true; + } + } + return false; + } + + static bool anyFailed(const ::testing::TestPartResultArray& results) + { + for(int i = 0; i < results.size(); ++i) + { + if(results.GetTestPartResult(i).failed()) + { + return true; + } + } + return false; + } + + static EngineStub matchingEngine() + { + return [](std::unordered_map& vp) { writeOutput(vp, K_OUTPUT_VALUE); }; + } + + static EngineStub mismatchingEngine() + { + return [](std::unordered_map& vp) { + writeOutput(vp, K_OUTPUT_VALUE + 100.0f); + }; + } + + static RefStub matchingRef() + { + return [](ReferenceExecutorType, std::unordered_map& vp) { + writeOutput(vp, K_OUTPUT_VALUE); + }; + } + + static RefStub capabilityMissRef() + { + return [](ReferenceExecutorType, std::unordered_map&) { + throw ReferenceCapabilityError("stub: unsupported op"); + }; + } + + static RefStub gpuMissCpuMatchRef() + { + return [](ReferenceExecutorType type, std::unordered_map& vp) { + if(type == ReferenceExecutorType::GPU) + { + throw ReferenceCapabilityError("stub: no GPU ref plan"); + } + writeOutput(vp, K_OUTPUT_VALUE); + }; + } +}; + +} // namespace + +// ── AUTO mode ─────────────────────────────────────────────────────────────── + +TEST_F(TestVerificationModePathsFixture, AutoWithGoldenUsesGoldenAndPasses) +{ + ::testing::TestPartResultArray results; + bool refCalled = false; + runCapturing( + loadBundle("auto_golden", /*includeGoldenOutput=*/true), + VerificationMode::AUTO, + matchingEngine(), + [&](ReferenceExecutorType, std::unordered_map&) { refCalled = true; }, + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); + EXPECT_FALSE(refCalled) << "Reference executor should NOT run when golden data is present"; +} + +TEST_F(TestVerificationModePathsFixture, AutoWithGoldenMismatchFails) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("auto_golden_mm", /*includeGoldenOutput=*/true), + VerificationMode::AUTO, + mismatchingEngine(), + matchingRef(), + &results); + + EXPECT_TRUE(anyFailed(results)); +} + +TEST_F(TestVerificationModePathsFixture, AutoNoGoldenRefSucceedsPasses) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("auto_gpu", /*includeGoldenOutput=*/false), + VerificationMode::AUTO, + matchingEngine(), + matchingRef(), + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); +} + +TEST_F(TestVerificationModePathsFixture, AutoNoGoldenRefMissFallsThroughToCpu) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("auto_fallthrough", /*includeGoldenOutput=*/false), + VerificationMode::AUTO, + matchingEngine(), + gpuMissCpuMatchRef(), + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); +} + +TEST_F(TestVerificationModePathsFixture, AutoNoGoldenBothRefsMissSkips) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("auto_both_miss", /*includeGoldenOutput=*/false), + VerificationMode::AUTO, + matchingEngine(), + capabilityMissRef(), + &results); + + EXPECT_TRUE(anySkipped(results)); + EXPECT_FALSE(anyFailed(results)); +} + +// ── GOLDEN mode ───────────────────────────────────────────────────────────── + +TEST_F(TestVerificationModePathsFixture, GoldenModeWithDataPasses) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("golden_ok", /*includeGoldenOutput=*/true), + VerificationMode::GOLDEN, + matchingEngine(), + capabilityMissRef(), + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); +} + +TEST_F(TestVerificationModePathsFixture, GoldenModeWithoutDataSkips) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("golden_absent", /*includeGoldenOutput=*/false), + VerificationMode::GOLDEN, + matchingEngine(), + matchingRef(), + &results); + + EXPECT_TRUE(anySkipped(results)); + EXPECT_FALSE(anyFailed(results)); +} + +// ── Explicit GPU mode ─────────────────────────────────────────────────────── +// "Device" in these case names denotes VerificationMode::GPU (the device-side +// reference executor). The literal "Gpu" keyword is reserved by the test-name +// linter for the suite name and so cannot appear in the case name. + +TEST_F(TestVerificationModePathsFixture, DeviceModeRefSucceedsPasses) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("gpu_ok", /*includeGoldenOutput=*/true), + VerificationMode::GPU, + matchingEngine(), + matchingRef(), + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); +} + +TEST_F(TestVerificationModePathsFixture, DeviceModeCapabilityMissSkips) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("gpu_miss", /*includeGoldenOutput=*/true), + VerificationMode::GPU, + matchingEngine(), + capabilityMissRef(), + &results); + + EXPECT_TRUE(anySkipped(results)); + EXPECT_FALSE(anyFailed(results)); +} + +// ── Explicit CPU mode ─────────────────────────────────────────────────────── + +TEST_F(TestVerificationModePathsFixture, CpuModeRefSucceedsPasses) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("cpu_ok", /*includeGoldenOutput=*/true), + VerificationMode::CPU, + matchingEngine(), + matchingRef(), + &results); + + EXPECT_FALSE(anyFailed(results)); + EXPECT_FALSE(anySkipped(results)); +} + +TEST_F(TestVerificationModePathsFixture, CpuModeCapabilityMissSkips) +{ + ::testing::TestPartResultArray results; + runCapturing(loadBundle("cpu_miss", /*includeGoldenOutput=*/true), + VerificationMode::CPU, + matchingEngine(), + capabilityMissRef(), + &results); + + EXPECT_TRUE(anySkipped(results)); + EXPECT_FALSE(anyFailed(results)); +} + +// NOLINTEND(readability-identifier-naming) diff --git a/dnn-providers/integration-tests/tests/TestVerificationPaths.cpp b/dnn-providers/integration-tests/tests/TestVerificationPaths.cpp index 36a54fcbb5ec..b2d7ecb5473d 100644 --- a/dnn-providers/integration-tests/tests/TestVerificationPaths.cpp +++ b/dnn-providers/integration-tests/tests/TestVerificationPaths.cpp @@ -12,12 +12,12 @@ #include #include -#include "harness/golden/BundleDiscovery.hpp" +#include "harness/bundle/BundleDiscovery.hpp" #include "harness/gpu_graph_executor/GpuReferenceGraphExecutor.hpp" // NOLINTBEGIN(readability-identifier-naming) -using namespace hipdnn_integration_tests::golden; +using namespace hipdnn_integration_tests::bundle; namespace {