diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index b09caee5bca2ba..19121c70cc9a46 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -55,7 +55,7 @@ class ZeroInferRequest final : public SyncInferRequest { const bool isInput, const std::optional batchSize = std::nullopt) const; - void add_state(const IODescriptor& descriptor, size_t tensorIndex) const override; + void add_state(const IODescriptor& descriptor, size_t tensorIndex) const; void update_pipeline_if_memory_changed(); void update_states_if_memory_changed(); diff --git a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp index c667d24a288d30..4804385f2389ba 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp @@ -8,6 +8,7 @@ #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/ivariable_state.hpp" +#include "zero_tensor.hpp" namespace intel_npu { @@ -20,57 +21,66 @@ class ZeroVariableState final : public ov::IVariableState { public: explicit ZeroVariableState(const std::shared_ptr& init_structs, const std::string& name, - const ov::SoPtr& tensor, + const std::shared_ptr& zero_tensor, size_t tensor_index, size_t related_tensor_index, - const Config& config, - bool external_memory_standard_allocation_supported); + const Config& config); void set_state(const ov::SoPtr& new_state) override; void reset() override; + ov::SoPtr get_state() const override; + /** - * @brief Get input tensor index used internally for the state + * @brief Get user state to not change the state of the tensor through get_state() */ - size_t get_tensor_index() const; + ov::SoPtr get_user_state() const; /** - * @brief Get output tensor index used internally for the state - * @details The related tensors are defined by state input, state output pairs. + * @brief Get internal level zero tensor. It can be different than the user tensor in case the user set a tensor + * that cannot be imported. Used by the InferenceRequest to update the arguments of the pipeline. */ - size_t get_related_tensor_index() const; + std::shared_ptr get_zero_state() const; /** - * @brief Get acknowledgment if the tensor was updated + * @brief Get input tensor index used internally for the state */ - bool tensor_was_updated() const; + size_t get_tensor_index() const; /** - * @brief Reset tensor updated flag + * @brief Get output tensor index used internally for the state + * @details The related tensors are defined by state input, state output pairs. */ - void reset_tensor_updated_flag(); + size_t get_related_tensor_index() const; /** - * @brief Get acknowledgment if the zero tensor was updated - * @details In case the memory was allocated in the same level zero context update the zero tensor + * @brief Get acknowledgment if state was updated + * @details Used to check if the state's internal user tensor was updated. Actions might need to be taken by the + * InferenceRequest in that case. This flag can be cleared using clear_state_update_pending(). An update to the user + * tensor might not trigger an update of the level zero tensor as well. zero_state_update_pending() should be used + * to check if the level zero tensor was also updated. */ - bool zero_tensor_should_be_updated() const; + bool state_update_pending() const; /** - * @brief Reset zero tensor updated flag + * @brief Reset state updated flag + * @details Must be used to reset the flag exposed through state_update_pending() */ - void reset_zero_tensor_updated_flag(); + void clear_state_update_pending(); /** - * @brief Get acknowledgment if the zero tensor can be imported + * @brief Get acknowledgment if the zero state was updated + * @details Used to signal that the state's internal zero tensor was also updated. Actions might need to be taken by + * the InferenceRequest in that case. This flag can be cleared using clear_zero_state_update_pending(). */ - bool zero_tensor_should_be_imported() const; + bool zero_state_update_pending() const; /** - * @brief Reset zero tensor imported flag + * @brief Reset zero state updated flag + * @details Must be used to reset the flag exposed through zero_state_update_pending() */ - void reset_tensor_imported_flag(); + void clear_zero_state_update_pending(); ~ZeroVariableState() override = default; @@ -79,11 +89,10 @@ class ZeroVariableState final : public ov::IVariableState { size_t _tensor_index; size_t _related_tensor_index; - bool _tensor_updated = false; - bool _zero_tensor_updated = false; - bool _tensor_should_be_imported = false; + std::shared_ptr _zero_state; - bool _external_memory_standard_allocation_supported = false; + bool _is_state_updated = false; + bool _is_zero_state_update_needed = false; const Config _config; Logger _logger; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index e5082fc661e823..3d2df2bbf05212 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -120,6 +120,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& for (const IODescriptor& inputDescriptor : _metadata.inputs) { check_level_zero_attributes_match(inputDescriptor, _graphInputDescriptors.at(ioIndex)); + // Tensors for regular inputs will be allocated later, only for ports that were not set by the user. + // Allocating only tensors for shapes and states. if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) { ++ioIndex; continue; @@ -127,6 +129,10 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& get_level_zero_input(ioIndex) = allocate_tensor(ioIndex, INPUT); + if (inputDescriptor.isStateInput) { + add_state(inputDescriptor, ioIndex); + } + ++ioIndex; } @@ -134,11 +140,27 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& for (const IODescriptor& outputDescriptor : _metadata.outputs) { check_level_zero_attributes_match(outputDescriptor, _graphOutputDescriptors.at(ioIndex)); + // Tensors for regular outputs will be allocated later, only for ports that were not set by the user. + // Allocating only tensors for shapes and states. if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) { ++ioIndex; continue; } + if (outputDescriptor.isStateOutput) { + // Only one buffer is required for each (state input, state output) pair, acting as an input before running + // the inference and as an output after performing it. Thus both the "state input" and "state output" + // entries shall point to the same buffer. + OPENVINO_ASSERT(outputDescriptor.relatedDescriptorIndex.has_value(), + "The link between state descriptors is missing, state name: ", + outputDescriptor.nameFromCompiler); + _levelZeroOutputTensors.at(ioIndex) = get_level_zero_input(*outputDescriptor.relatedDescriptorIndex); + _userOutputTensors.at(ioIndex) = _levelZeroOutputTensors.at(ioIndex); + + ++ioIndex; + continue; + } + _levelZeroOutputTensors.at(ioIndex) = allocate_tensor(ioIndex, OUTPUT); ++ioIndex; @@ -240,42 +262,21 @@ void ZeroInferRequest::create_pipeline() { "before creating the pipeline"); for (const auto& variableState : _variableStates) { auto zeroState = std::dynamic_pointer_cast(variableState._ptr); - OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); - if (zeroState->tensor_was_updated()) { + if (zeroState->state_update_pending()) { _logger.debug("ZeroInferRequest::create_pipeline - user state tensor should be updated"); - get_user_input(zeroState->get_tensor_index()) = zeroState->get_state(); - _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state(); - zeroState->reset_tensor_updated_flag(); - - auto& userInput = get_user_input(zeroState->get_tensor_index())._ptr; + get_user_input(zeroState->get_tensor_index()) = zeroState->get_user_state(); + _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_user_state(); + zeroState->clear_state_update_pending(); - if (zeroState->zero_tensor_should_be_updated()) { + if (zeroState->zero_state_update_pending()) { _logger.debug("ZeroInferRequest::create_pipeline - level zero state tensor should be updated"); - auto& levelZeroInput = get_level_zero_input(zeroState->get_tensor_index()); - auto& levelZeroOutput = _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()); - - if (zeroState->zero_tensor_should_be_imported()) { - // TODO in further PR - // auto hostMemSharedAllocator = zeroMemory::HostMemSharedAllocator(_initStructs, userInput); - levelZeroInput = std::make_shared(_initStructs, - _config, - userInput->get_element_type(), - userInput->get_shape(), - INPUT); - - levelZeroOutput = levelZeroInput; - - zeroState->reset_tensor_imported_flag(); - } else { - levelZeroInput = std::make_shared(_initStructs, userInput, _config); - levelZeroOutput = levelZeroInput; - } - - zeroState->reset_zero_tensor_updated_flag(); + get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state(); + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state(); + zeroState->clear_zero_state_update_pending(); } } } @@ -568,34 +569,19 @@ std::shared_ptr ZeroInferRequest::allocate_tensor(const size_t index const auto& descriptor = isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index); check_network_precision(descriptor.precision); - std::shared_ptr tensor; ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape(); if (batchSize.has_value()) { allocatedTensorShape[utils::BATCH_AXIS] = *batchSize; } - if (descriptor.isStateOutput) { - // Only one buffer is required for each (state input, state output) pair, acting as an input before running the - // inference and as an output after performing it. Thus both the "state input" and "state output" entries shall - // point to the same buffer. - OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(), - "The link between state descriptors is missing, state name: ", - descriptor.nameFromCompiler); - tensor = get_level_zero_input(*descriptor.relatedDescriptorIndex); - } else { - tensor = - std::make_shared(_initStructs, _config, descriptor.precision, allocatedTensorShape, isInput); - } + auto tensor = + std::make_shared(_initStructs, _config, descriptor.precision, allocatedTensorShape, isInput); if (isInput) { if (get_user_input(index) == nullptr) { get_user_input(index) = tensor; } - - if (descriptor.isStateInput) { - add_state(descriptor, index); - } } else if (_userOutputTensors.at(index) == nullptr) { _userOutputTensors.at(index) = tensor; } @@ -616,6 +602,12 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() { } if (levelZeroTensor.at(SINGLE_TENSOR)->memory_address_changed()) { + // Memory address can change only a through tensor reshape. Tensor reallocation for a larger shape is + // allowed only when mutable command list version >= 1.0. This point should not be reached otherwise. + if (_initStructs->getMutableCommandListExtVersion() < ZE_MAKE_VERSION(1, 0)) { + OPENVINO_THROW("Reallocation of zero memory is not supported with this driver."); + } + _logger.debug("Update input graph descriptor with the new tensor"); OPENVINO_ASSERT(levelZeroTensor.at(SINGLE_TENSOR)->data(), "Empty buffer"); @@ -642,6 +634,12 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() { } if (levelZeroTensor->memory_address_changed()) { + // Memory address can change only a through tensor reshape. Tensor reallocation for a larger shape is + // allowed only when mutable command list version >= 1.0. This point should not be reached otherwise. + if (_initStructs->getMutableCommandListExtVersion() < ZE_MAKE_VERSION(1, 0)) { + OPENVINO_THROW("Reallocation of zero memory is not supported with this driver."); + } + _logger.debug("Update output graph descriptor with the new tensor"); OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer"); @@ -659,80 +657,31 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() { void ZeroInferRequest::update_states_if_memory_changed() { for (const auto& variableState : _variableStates) { auto zeroState = std::dynamic_pointer_cast(variableState._ptr); - OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); - if (zeroState->tensor_was_updated()) { - get_user_input(zeroState->get_tensor_index()) = zeroState->get_state(); - _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state(); - - zeroState->reset_tensor_updated_flag(); - - auto& userInput = get_user_input(zeroState->get_tensor_index())._ptr; - auto& levelZeroInput = get_level_zero_input(zeroState->get_tensor_index()); - auto& levelZeroOutput = _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()); - - if (zeroState->zero_tensor_should_be_updated()) { - if (zeroState->zero_tensor_should_be_imported()) { - // TODO in further PR - // auto hostMemSharedAllocator = zeroMemory::HostMemSharedAllocator(_initStructs, userInput); - levelZeroInput = std::make_shared(_initStructs, - _config, - userInput->get_element_type(), - userInput->get_shape(), - INPUT); - - levelZeroOutput = levelZeroInput; - - _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, - levelZeroInput->data(), - levelZeroInput->get_byte_size()); - - _pipeline->update_graph_arguments( - _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx, - levelZeroInput->data(), - levelZeroInput->get_byte_size()); - - zeroState->reset_tensor_imported_flag(); - } else { - levelZeroInput = std::make_shared(_initStructs, zeroState->get_state(), _config); - levelZeroOutput = levelZeroInput; - - _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, - levelZeroInput->data(), - levelZeroInput->get_byte_size()); - - _pipeline->update_graph_arguments( - _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx, - levelZeroInput->data(), - levelZeroInput->get_byte_size()); - } - - zeroState->reset_zero_tensor_updated_flag(); - } else if (levelZeroInput) { - if (!levelZeroInput->can_be_reused()) { - levelZeroInput = std::make_shared( - _initStructs, - _config, - _metadata.inputs.at(zeroState->get_tensor_index()).precision, - _metadata.inputs.at(zeroState->get_tensor_index()).shapeFromCompiler.get_max_shape(), - OUTPUT); - levelZeroOutput = levelZeroInput; - - _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, - levelZeroOutput->data(), - levelZeroOutput->get_byte_size()); - - _pipeline->update_graph_arguments( - _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx, - levelZeroOutput->data(), - levelZeroOutput->get_byte_size()); - - zeroState->reset_zero_tensor_updated_flag(); - } else { - _logger.debug("ZeroInferRequest::update_states_if_memory_changed - reusing the zero memory since " - "it is not shared with the user"); - } + if (zeroState->state_update_pending()) { + get_user_input(zeroState->get_tensor_index()) = zeroState->get_user_state(); + _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_user_state(); + zeroState->clear_state_update_pending(); + + // State's tensor was previously updated. This change needs to be reflected in the inference request since + // states tensors are not visible inside the pipeline. + // Update input and output arguments that correspond to the state only if command lists are supported. + // Push/pull methods would later perform memory copies otherwise. + if (_initStructs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0) && + zeroState->zero_state_update_pending()) { + get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state(); + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state(); + zeroState->clear_zero_state_update_pending(); + + _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, + get_level_zero_input(zeroState->get_tensor_index())->data(), + get_level_zero_input(zeroState->get_tensor_index())->get_byte_size()); + + _pipeline->update_graph_arguments( + _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx, + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(), + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size()); } } } @@ -760,11 +709,8 @@ void ZeroInferRequest::infer_async() { _pipelineIsCreated = true; _dynamicBatchValueChanged = false; // Reset reallocation flag } else { - if (_initStructs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) { - update_pipeline_if_memory_changed(); - update_states_if_memory_changed(); - } - // If command list updates are not supported, fallback to copying tensors every time. + update_pipeline_if_memory_changed(); + update_states_if_memory_changed(); } } @@ -986,11 +932,10 @@ void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIn _variableStates.push_back(std::make_shared(_initStructs, descriptor.nameFromCompiler, - get_user_input(tensorIndex), + get_level_zero_input(tensorIndex), tensorIndex, descriptor.relatedDescriptorIndex.value(), - _config, - _externalMemoryStandardAllocationSupported)); + _config)); } std::shared_ptr& ZeroInferRequest::get_level_zero_input(size_t index, size_t tensorNo) const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp index 61ce4e3d24a6aa..bd186bbf54d336 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp @@ -6,62 +6,84 @@ #include "intel_npu/config/options.hpp" #include "intel_npu/utils/utils.hpp" +#include "intel_npu/utils/zero/zero_host_tensor.hpp" #include "intel_npu/utils/zero/zero_remote_tensor.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_tensor.hpp" namespace intel_npu { ZeroVariableState::ZeroVariableState(const std::shared_ptr& init_structs, const std::string& name, - const ov::SoPtr& tensor, + const std::shared_ptr& zero_tensor, size_t tensor_index, size_t related_tensor_index, - const Config& config, - bool external_memory_standard_allocation_supported) + const Config& config) : ov::IVariableState(name), _init_structs(init_structs), _tensor_index(tensor_index), _related_tensor_index(related_tensor_index), - _external_memory_standard_allocation_supported(external_memory_standard_allocation_supported), + _zero_state(zero_tensor), _config(config), _logger("ZeroVariableState", _config.get()) { - m_state = tensor; + m_state = _zero_state; } void ZeroVariableState::set_state(const ov::SoPtr& new_state) { - m_state = new_state; - _tensor_updated = true; + if (m_state._ptr == new_state._ptr) { + // set_tensor called with the same tensor object; no action needed + _logger.debug("ZeroVariableState::set_state - got the same state, do nothing"); + return; + } - if (_init_structs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) { - if (!is_remote_tensor(new_state._ptr)) { - if (zeroUtils::memory_was_allocated_in_the_same_l0_context(_init_structs->getContext(), - new_state->data())) { - _logger.debug("ZeroVariableState::set_state - tensor was created in the same L0 context"); + m_state = new_state; + _is_state_updated = true; + + try { + _logger.debug("ZeroVariableState::set_state - create zero tensor"); + // Try to use the user tensor directly if its underlying data is already allocated in the same Level Zero + // context. + _zero_state = std::make_shared(_init_structs, m_state, _config); + _is_zero_state_update_needed = true; + } catch (const ZeroTensorException&) { + // Check if the current Level Zero tensor was previously shared with the user. If so, it cannot be reused; + // allocate a new tensor to back up the user tensor (which cannot be imported or used directly). + if (_zero_state == nullptr || !_zero_state->can_be_reused()) { + _logger.debug("ZeroVariableState::set_state - allocate locally L0 tensor"); + _zero_state = std::make_shared(_init_structs, + _config, + m_state->get_element_type(), + m_state->get_shape(), + false); + _is_zero_state_update_needed = true; + } else { + _logger.debug("ZeroVariableState::set_state - reusing the level zero tensor since it is not shared " + "with the user"); + } + } +} - _zero_tensor_updated = true; - } else if (_external_memory_standard_allocation_supported && - utils::memory_and_size_aligned_to_standard_page_size(new_state->data(), - new_state->get_byte_size())) { - _logger.debug("ZeroVariableState::set_state - tensor will be imported"); +ov::SoPtr ZeroVariableState::get_state() const { + auto zero_tensor = std::dynamic_pointer_cast(m_state._ptr); + if (zero_tensor != nullptr) { + zero_tensor->prevent_reuse(); + } - _tensor_should_be_imported = true; - _zero_tensor_updated = true; - } + return m_state; +} - return; - } +ov::SoPtr ZeroVariableState::get_user_state() const { + return m_state; +} - _zero_tensor_updated = true; - } +std::shared_ptr ZeroVariableState::get_zero_state() const { + return _zero_state; } void ZeroVariableState::reset() { - auto remoteTensor = std::dynamic_pointer_cast(m_state._ptr); - - void* userBuffer = !remoteTensor ? m_state->data() : remoteTensor->get_original_memory(); + auto remote_tensor = std::dynamic_pointer_cast(m_state._ptr); - std::memset(userBuffer, 0, m_state->get_byte_size()); + void* user_buffer = !remote_tensor ? m_state->data() : remote_tensor->get_original_memory(); + std::memset(user_buffer, 0, m_state->get_byte_size()); } size_t ZeroVariableState::get_tensor_index() const { @@ -72,28 +94,20 @@ size_t ZeroVariableState::get_related_tensor_index() const { return _related_tensor_index; } -bool ZeroVariableState::tensor_was_updated() const { - return _tensor_updated; -} - -void ZeroVariableState::reset_tensor_updated_flag() { - _tensor_updated = false; -} - -bool ZeroVariableState::zero_tensor_should_be_updated() const { - return _zero_tensor_updated; +bool ZeroVariableState::state_update_pending() const { + return _is_state_updated; } -void ZeroVariableState::reset_zero_tensor_updated_flag() { - _zero_tensor_updated = false; +void ZeroVariableState::clear_state_update_pending() { + _is_state_updated = false; } -bool ZeroVariableState::zero_tensor_should_be_imported() const { - return _tensor_should_be_imported; +bool ZeroVariableState::zero_state_update_pending() const { + return _is_zero_state_update_needed; } -void ZeroVariableState::reset_tensor_imported_flag() { - _tensor_should_be_imported = false; +void ZeroVariableState::clear_zero_state_update_pending() { + _is_zero_state_update_needed = false; } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index 4d0e0d9b7e1255..9c068a5cb80593 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -6,7 +6,6 @@ #include "intel_npu/common/icompiled_model.hpp" #include "intel_npu/common/igraph.hpp" -#include "intel_npu/common/variable_state.hpp" #include "intel_npu/network_metadata.hpp" #include "openvino/runtime/iinfer_request.hpp" #include "openvino/runtime/iplugin.hpp" @@ -148,8 +147,6 @@ class SyncInferRequest : public ov::IInferRequest { */ virtual void check_network_precision(const ov::element::Type_t precision) const = 0; - virtual void add_state(const IODescriptor& descriptor, const size_t tensorIndex) const; - bool is_batched_input(size_t idx) const; ov::SoPtr& get_user_input(size_t index) const; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp deleted file mode 100644 index 0987f2b44bbb04..00000000000000 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/itensor.hpp" -#include "openvino/runtime/ivariable_state.hpp" - -namespace intel_npu { - -class VariableState final : public ov::IVariableState { -public: - explicit VariableState(const std::string& name, const ov::SoPtr& tensor) : ov::IVariableState(name) { - m_state = tensor; - } - - virtual void set_state(const ov::SoPtr& newState) override { - if (newState->get_byte_size() != m_state->get_byte_size()) { - OPENVINO_THROW("Byte size mismatch"); - } - - std::memcpy(m_state->data(), newState->data(), newState->get_byte_size()); - } - - virtual void reset() override { - std::memset(m_state->data(), 0, m_state->get_byte_size()); - } - - ~VariableState() override = default; -}; - -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index baa32e6128898d..d3eed4e7357005 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -323,11 +323,6 @@ void SyncInferRequest::check_tensors() const { } } -void SyncInferRequest::add_state(const IODescriptor& descriptor, const size_t tensorIndex) const { - _variableStates.push_back( - std::make_shared(descriptor.nameFromCompiler, get_user_input(tensorIndex))); -} - bool SyncInferRequest::is_batched_input(size_t idx) const { return _userInputTensors.at(idx).size() > 1; } diff --git a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp similarity index 71% rename from src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp rename to src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp index d0b16f447946ff..5613d6aea4fa36 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp +++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/zero_tensor/zero_tensor_run.hpp" +#include "internal/backend/zero_tensor_tests.hpp" #include "common/npu_test_env_cfg.hpp" #include "common/utils.hpp" @@ -13,8 +13,8 @@ using namespace ov::test::behavior; const std::vector configsInferRequestRunTests = {{}}; -INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, - ZeroTensorRunTests, +INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, + ZeroTensorTests, ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), ::testing::ValuesIn(configsInferRequestRunTests)), - ZeroTensorRunTests::getTestCaseName); + ZeroTensorTests::getTestCaseName); diff --git a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp similarity index 90% rename from src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp rename to src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp index 6c01a9b9ef7766..79e1f0b349c382 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp @@ -43,8 +43,8 @@ using ::testing::HasSubstr; namespace ov { namespace test { namespace behavior { -class ZeroTensorRunTests : public ov::test::behavior::OVPluginTestBase, - public testing::WithParamInterface { +class ZeroTensorTests : public ov::test::behavior::OVPluginTestBase, + public testing::WithParamInterface { protected: std::shared_ptr core = utils::PluginCache::get().core(); ov::AnyMap configuration; @@ -91,7 +91,7 @@ class ZeroTensorRunTests : public ov::test::behavior::OVPluginTestBase, } }; -TEST_P(ZeroTensorRunTests, AllocateDeleteAllocateZeroTensor) { +TEST_P(ZeroTensorTests, AllocateDeleteAllocateZeroTensor) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto shape = Shape{1, 2, 2, 2}; @@ -110,7 +110,7 @@ TEST_P(ZeroTensorRunTests, AllocateDeleteAllocateZeroTensor) { ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), address)); } -TEST_P(ZeroTensorRunTests, CheckSetSmallerShape) { +TEST_P(ZeroTensorTests, CheckSetSmallerShape) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto shape = Shape{1, 20, 20, 20}; @@ -134,7 +134,7 @@ TEST_P(ZeroTensorRunTests, CheckSetSmallerShape) { zero_tensor->data())); } -TEST_P(ZeroTensorRunTests, CheckSetBiggerShape) { +TEST_P(ZeroTensorTests, CheckSetBiggerShape) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto shape = Shape{1, 20, 20, 20}; @@ -148,16 +148,20 @@ TEST_P(ZeroTensorRunTests, CheckSetBiggerShape) { auto new_shape_size = ov::shape_size(new_shape); // set_shape() will force tensor reallocation for a larger shape. The new data pointer must also be a valid level // zero address. - zero_tensor->set_shape(new_shape); - EXPECT_EQ(new_shape, zero_tensor->get_shape()); - EXPECT_EQ(new_shape_size, zero_tensor->get_size()); - EXPECT_EQ(new_shape_size * sizeof(ov::element::f32), zero_tensor->get_byte_size()); - ASSERT_TRUE(zero_tensor->memory_address_changed()); - ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), - zero_tensor->data())); + if (init_struct->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) { + zero_tensor->set_shape(new_shape); + EXPECT_EQ(new_shape, zero_tensor->get_shape()); + EXPECT_EQ(new_shape_size, zero_tensor->get_size()); + EXPECT_EQ(new_shape_size * sizeof(ov::element::f32), zero_tensor->get_byte_size()); + ASSERT_TRUE(zero_tensor->memory_address_changed()); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_tensor->data())); + } else { + ASSERT_THROW(zero_tensor->set_shape(new_shape), ov::Exception); + } } -TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensorScalar) { +TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensorScalar) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto zero_tensor = @@ -171,7 +175,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensorScalar) { EXPECT_EQ(view_tensor.is_continuous(), true); } -TEST_P(ZeroTensorRunTests, CheckIsContinuousHostTensor1Dimension) { +TEST_P(ZeroTensorTests, CheckIsContinuousHostTensor1Dimension) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto zero_tensor = @@ -189,7 +193,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousHostTensor1Dimension) { EXPECT_EQ(view_tensor.is_continuous(), true); } -TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor2Dimensions) { +TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor2Dimensions) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto zero_tensor = @@ -212,7 +216,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor2Dimensions) { EXPECT_EQ(view_tensor.is_continuous(), false); } -TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor3Dimensions) { +TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor3Dimensions) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto zero_tensor = @@ -238,7 +242,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor3Dimensions) { EXPECT_EQ(view_tensor.is_continuous(), true); } -TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor4Dimensions) { +TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor4Dimensions) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, @@ -279,7 +283,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor4Dimensions) { EXPECT_EQ(view_tensor.is_continuous(), true); } -TEST_P(ZeroTensorRunTests, CopyDefaultTensorExpectedThrow) { +TEST_P(ZeroTensorTests, CopyDefaultTensorExpectedThrow) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto shape = Shape{1, 2, 2, 2}; @@ -293,7 +297,7 @@ TEST_P(ZeroTensorRunTests, CopyDefaultTensorExpectedThrow) { ::operator delete(data); } -TEST_P(ZeroTensorRunTests, CopyZeroTensorAndKeepAlive) { +TEST_P(ZeroTensorTests, CopyZeroTensorAndKeepAlive) { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto shape = Shape{1, 2, 2, 2}; @@ -314,7 +318,7 @@ TEST_P(ZeroTensorRunTests, CopyZeroTensorAndKeepAlive) { ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception); } -TEST_P(ZeroTensorRunTests, CopyHostTensorAndKeepAlive) { +TEST_P(ZeroTensorTests, CopyHostTensorAndKeepAlive) { SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>(); @@ -337,7 +341,7 @@ TEST_P(ZeroTensorRunTests, CopyHostTensorAndKeepAlive) { ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception); } -TEST_P(ZeroTensorRunTests, CopyRemoteTensorAndKeepAlive) { +TEST_P(ZeroTensorTests, CopyRemoteTensorAndKeepAlive) { SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>(); @@ -360,7 +364,7 @@ TEST_P(ZeroTensorRunTests, CopyRemoteTensorAndKeepAlive) { ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception); } -TEST_P(ZeroTensorRunTests, CopyRemoteTensorFromAnotherContextThrow) { +TEST_P(ZeroTensorTests, CopyRemoteTensorFromAnotherContextThrow) { SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>(); diff --git a/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp new file mode 100644 index 00000000000000..fa7d6ab85adfc5 --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "internal/backend/zero_variable_state_tests.hpp" + +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" +#include "intel_npu/config/options.hpp" +#include "intel_npu/npu_private_properties.hpp" + +using namespace ov::test::behavior; + +const std::vector configsInferRequestRunTests = {{}}; + +INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, + ZeroVariableStateTests, + ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(configsInferRequestRunTests)), + ZeroVariableStateTests::getTestCaseName); diff --git a/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp new file mode 100644 index 00000000000000..90c1b1de8e8ecd --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp @@ -0,0 +1,327 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" +#include "intel_npu/common/npu.hpp" +#include "intel_npu/config/config.hpp" +#include "intel_npu/config/options.hpp" +#include "intel_npu/utils/zero/zero_host_tensor.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" +#include "intel_npu/utils/zero/zero_remote_tensor.hpp" +#include "intel_npu/utils/zero/zero_utils.hpp" +#include "openvino/core/any.hpp" +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "remote_context.hpp" +#include "shared_test_classes/base/ov_behavior_test_utils.hpp" +#include "zero_backend.hpp" +#include "zero_tensor.hpp" +#include "zero_variable_state.hpp" + +using CompilationParams = std::tuple; + +using ::testing::AllOf; +using ::testing::HasSubstr; + +namespace ov { +namespace test { +namespace behavior { +class ZeroVariableStateTests : public ov::test::behavior::OVPluginTestBase, + public testing::WithParamInterface { +protected: + std::shared_ptr core = utils::PluginCache::get().core(); + ov::AnyMap configuration; + std::shared_ptr<::intel_npu::ZeroInitStructsHolder> init_struct; + std::shared_ptr<::intel_npu::OptionsDesc> options = std::make_shared<::intel_npu::OptionsDesc>(); + ::intel_npu::Config npu_config = ::intel_npu::Config(options); + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::string targetDevice; + ov::AnyMap configuration; + std::tie(targetDevice, configuration) = obj.param; + std::replace(targetDevice.begin(), targetDevice.end(), ':', '_'); + targetDevice = ov::test::utils::getTestsPlatformFromEnvironmentOr(ov::test::utils::DEVICE_NPU); + + std::ostringstream result; + result << "targetDevice=" << targetDevice << "_"; + result << "targetPlatform=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(targetDevice) << "_"; + if (!configuration.empty()) { + for (auto& configItem : configuration) { + result << "configItem=" << configItem.first << "_"; + configItem.second.print(result); + } + } + + return result.str(); + } + + void SetUp() override { + std::tie(target_device, configuration) = this->GetParam(); + + SKIP_IF_CURRENT_TEST_IS_DISABLED() + OVPluginTestBase::SetUp(); + + init_struct = ::intel_npu::ZeroInitStructsHolder::getInstance(); + } + + void TearDown() override { + if (!configuration.empty()) { + utils::PluginCache::get().reset(); + } + + APIBaseTest::TearDown(); + } +}; + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndCheckAgainstZeroTensorState) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + + EXPECT_EQ(zero_state->get_state()->data(), zero_tensor->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_tensor->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_tensor->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_tensor->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_tensor->get_element_type()); + + EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithZeroTensor) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto zero_tensor0 = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor0, 1, 1, npu_config); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + + auto zero_tensor1 = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + zero_state->set_state(zero_tensor1); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + + ASSERT_TRUE(zero_state->state_update_pending()); + ASSERT_TRUE(zero_state->zero_state_update_pending()); + + EXPECT_NE(zero_state->get_state()->data(), zero_tensor0->data()); + + EXPECT_EQ(zero_state->get_state()->data(), zero_tensor1->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_tensor1->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_tensor1->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_tensor1->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_tensor1->get_element_type()); + + EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithNormalTensor) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config); + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_user_state()->data())); + + // shape size is unaligned to standard page size, expect to fail + auto data = static_cast(::operator new(ov::shape_size(shape) * sizeof(ov::element::f32))); + auto tensor = make_tensor(ov::element::f32, shape, data); + zero_state->set_state(tensor); + ASSERT_FALSE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_zero_state()->data())); + + ASSERT_TRUE(zero_state->state_update_pending()); + ASSERT_FALSE(zero_state->zero_state_update_pending()); + + EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data()); + EXPECT_EQ(zero_state->get_zero_state()->data(), zero_tensor->data()); + + EXPECT_EQ(zero_state->get_state()->data(), tensor->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), tensor->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), tensor->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), tensor->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), tensor->get_element_type()); + + EXPECT_NE(zero_state->get_state()->data(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithNormalTensorAfterGetState) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + + // shape size is unaligned to standard page size, expect to fail + auto data = static_cast(::operator new(ov::shape_size(shape) * sizeof(ov::element::f32))); + auto tensor = make_tensor(ov::element::f32, shape, data); + zero_state->set_state(tensor); + ASSERT_FALSE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_zero_state()->data())); + + ASSERT_TRUE(zero_state->state_update_pending()); + ASSERT_TRUE(zero_state->zero_state_update_pending()); + + EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data()); + EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data()); + + EXPECT_EQ(zero_state->get_state()->data(), tensor->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), tensor->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), tensor->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), tensor->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), tensor->get_element_type()); + + EXPECT_NE(zero_state->get_state()->data(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithHostTensor) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>(); + auto zero_context = std::make_shared<::intel_npu::RemoteContextImpl>(engine_backend); + auto shape = Shape{1, 2, 2, 2}; + + auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config); + + // shape size is unaligned to standard page size, expect to fail + auto host_tensor = + std::make_shared<::intel_npu::ZeroHostTensor>(zero_context, init_struct, ov::element::f32, shape); + zero_state->set_state(host_tensor); + ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_state()->data())); + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_zero_state()->data())); + + ASSERT_TRUE(zero_state->state_update_pending()); + ASSERT_TRUE(zero_state->zero_state_update_pending()); + + EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data()); + EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data()); + + EXPECT_EQ(zero_state->get_state()->data(), host_tensor->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), host_tensor->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), host_tensor->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), host_tensor->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), host_tensor->get_element_type()); + + EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithRemoteTensor) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>(); + auto zero_context = std::make_shared<::intel_npu::RemoteContextImpl>(engine_backend); + auto shape = Shape{1, 2, 2, 2}; + + auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true); + auto zero_state = + std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config); + + // shape size is unaligned to standard page size, expect to fail + auto remote_tensor = + std::make_shared<::intel_npu::ZeroRemoteTensor>(zero_context, init_struct, ov::element::f32, shape); + zero_state->set_state(remote_tensor); + auto zero_remote_tensor = std::dynamic_pointer_cast<::intel_npu::ZeroRemoteTensor>(zero_state->get_state()._ptr); + + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_remote_tensor->get_original_memory())); + ASSERT_TRUE( + ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), + zero_state->get_zero_state()->data())); + + ASSERT_TRUE(zero_state->state_update_pending()); + ASSERT_TRUE(zero_state->zero_state_update_pending()); + + EXPECT_NE(zero_remote_tensor->get_original_memory(), zero_tensor->data()); + EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data()); + + EXPECT_EQ(zero_remote_tensor->get_original_memory(), remote_tensor->get_original_memory()); + EXPECT_EQ(zero_state->get_state()->get_shape(), remote_tensor->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), remote_tensor->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), remote_tensor->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), remote_tensor->get_element_type()); + + EXPECT_EQ(zero_remote_tensor->get_original_memory(), zero_state->get_zero_state()->data()); + EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape()); + EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size()); + EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size()); + EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type()); + + OV_ASSERT_NO_THROW(zero_state->reset()); +} + +} // namespace behavior +} // namespace test +} // namespace ov