Fix issue and revert states type

pereanub · pereanub · commit 80cac84863ed · 2025-09-18T14:17:30.000+03:00
Signed-off-by: Bogdan Pereanu &lt;bogdan.pereanu@intel.com&gt;
diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -35,14 +35,6 @@ class ZeroInferRequest final : public SyncInferRequest {
 
     void get_result() override;
 
-    std::vector<ov::SoPtr<ov::IVariableState>> query_state() const override;
-
-    /**
-     * @brief Initializes the tensor values corresponding to the state variables.
-     * @details The inital values are usually all 0s.
-     */
-    void initialize_states() override;
-
 private:
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
 
@@ -64,7 +56,9 @@ class ZeroInferRequest final : public SyncInferRequest {
                                                 const bool isInput,
                                                 const std::optional<std::size_t> batchSize = std::nullopt) const;
 
-    void add_state(const IODescriptor& descriptor, size_t tensorIndex) const;
+    void add_state(const IODescriptor& descriptor,
+                   size_t tensorIndex,
+                   const std::shared_ptr<ZeroTensor>& zeroTensor) const;
 
     void update_pipeline_if_memory_changed();
     void update_states_if_memory_changed();
@@ -82,8 +76,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     mutable std::vector<std::vector<std::shared_ptr<ZeroTensor>>> _levelZeroInputTensors;
     mutable std::vector<std::shared_ptr<ZeroTensor>> _levelZeroOutputTensors;
 
-    mutable std::vector<ov::SoPtr<ZeroVariableState>> _variableStates;
-
     std::unique_ptr<Pipeline> _pipeline;
 
     bool _pipelineIsCreated = false;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -238,19 +238,22 @@ void ZeroInferRequest::create_pipeline() {
     _logger.debug("ZeroInferRequest::create_pipeline - set new tensors and reset variable state flag if memory updated "
                   "before creating the pipeline");
     for (const auto& variableState : _variableStates) {
-        if (variableState->tensor_was_updated()) {
+        auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);
+        OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");
+
+        if (zeroState->tensor_was_updated()) {
             _logger.debug("ZeroInferRequest::create_pipeline - user state tensor should be updated");
 
-            get_user_input(variableState->get_tensor_index()) = variableState->get_state();
-            _userOutputTensors.at(variableState->get_related_tensor_index()) = variableState->get_state();
-            variableState->reset_tensor_updated_flag();
+            get_user_input(zeroState->get_tensor_index()) = zeroState->get_state();
+            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state();
+            zeroState->reset_tensor_updated_flag();
 
-            if (variableState->zero_tensor_should_be_updated()) {
+            if (zeroState->zero_tensor_should_be_updated()) {
                 _logger.debug("ZeroInferRequest::create_pipeline - level zero state tensor should be updated");
 
-                get_level_zero_input(variableState->get_tensor_index()) = variableState->get_zero_state();
-                _levelZeroOutputTensors.at(variableState->get_related_tensor_index()) = variableState->get_zero_state();
-                variableState->reset_zero_tensor_updated_flag();
+                get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state();
+                _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state();
+                zeroState->reset_zero_tensor_updated_flag();
             }
         }
     }
@@ -569,7 +572,7 @@ std::shared_ptr<ZeroTensor> ZeroInferRequest::allocate_tensor(const size_t index
         }
 
         if (descriptor.isStateInput) {
-            add_state(descriptor, index);
+            add_state(descriptor, index, tensor);
         }
     } else if (_userOutputTensors.at(index) == nullptr) {
         _userOutputTensors.at(index) = tensor;
@@ -633,25 +636,27 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
 
 void ZeroInferRequest::update_states_if_memory_changed() {
     for (const auto& variableState : _variableStates) {
-        if (variableState->tensor_was_updated()) {
-            get_user_input(variableState->get_tensor_index()) = variableState->get_state();
-            _userOutputTensors.at(variableState->get_related_tensor_index()) = variableState->get_state();
-            variableState->reset_tensor_updated_flag();
+        auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);
+        OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");
 
-            if (variableState->zero_tensor_should_be_updated()) {
-                get_level_zero_input(variableState->get_tensor_index()) = variableState->get_zero_state();
-                _levelZeroOutputTensors.at(variableState->get_related_tensor_index()) = variableState->get_zero_state();
-                variableState->reset_zero_tensor_updated_flag();
+        if (zeroState->tensor_was_updated()) {
+            get_user_input(zeroState->get_tensor_index()) = zeroState->get_state();
+            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state();
+            zeroState->reset_tensor_updated_flag();
 
-                _pipeline->update_graph_arguments(
-                    _graphInputDescriptors.at(variableState->get_tensor_index()).idx,
-                    get_level_zero_input(variableState->get_tensor_index())->data(),
-                    get_level_zero_input(variableState->get_tensor_index())->get_byte_size());
+            if (zeroState->zero_tensor_should_be_updated()) {
+                get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state();
+                _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state();
+                zeroState->reset_zero_tensor_updated_flag();
+
+                _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
+                                                  get_level_zero_input(zeroState->get_tensor_index())->data(),
+                                                  get_level_zero_input(zeroState->get_tensor_index())->get_byte_size());
 
                 _pipeline->update_graph_arguments(
-                    _graphOutputDescriptors.at(variableState->get_related_tensor_index()).idx,
-                    _levelZeroOutputTensors.at(variableState->get_related_tensor_index())->data(),
-                    _levelZeroOutputTensors.at(variableState->get_related_tensor_index())->get_byte_size());
+                    _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
+                    _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(),
+                    _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size());
             }
         }
     }
@@ -842,21 +847,6 @@ void ZeroInferRequest::get_result() {
     _logger.debug("InferRequest::get_result finished");
 }
 
-void ZeroInferRequest::initialize_states() {
-    for (const auto& variableState : _variableStates) {
-        variableState->reset();
-    }
-}
-
-std::vector<ov::SoPtr<ov::IVariableState>> ZeroInferRequest::query_state() const {
-    std::vector<ov::SoPtr<ov::IVariableState>> result;
-    result.reserve(_variableStates.size());
-    for (const auto& state : _variableStates) {
-        result.push_back(state);  // Implicit upcast from SoPtr<ZeroVariableState> to SoPtr<IVariableState>
-    }
-    return result;
-}
-
 void ZeroInferRequest::check_network_precision(const ov::element::Type_t precision) const {
     switch (precision) {
     case ov::element::Type_t::f32:
@@ -913,14 +903,16 @@ std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
     return _pipeline->get_profiling_info();
 }
 
-void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIndex) const {
+void ZeroInferRequest::add_state(const IODescriptor& descriptor,
+                                 size_t tensorIndex,
+                                 const std::shared_ptr<ZeroTensor>& zeroTensor) const {
     OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(),
                     "The link between state descriptors is missing, state name: ",
                     descriptor.nameFromCompiler);
 
     _variableStates.push_back(std::make_shared<ZeroVariableState>(_initStructs,
                                                                   descriptor.nameFromCompiler,
-                                                                  get_level_zero_input(tensorIndex),
+                                                                  zeroTensor,
                                                                   tensorIndex,
                                                                   descriptor.relatedDescriptorIndex.value(),
                                                                   _config));
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp
@@ -84,11 +84,13 @@ class SyncInferRequest : public ov::IInferRequest {
      */
     virtual void get_result() = 0;
 
+    std::vector<ov::SoPtr<ov::IVariableState>> query_state() const override;
+
     /**
      * @brief Initializes the tensor values corresponding to the state variables.
      * @details The inital values are usually all 0s.
      */
-    virtual void initialize_states() = 0;
+    void initialize_states();
 
 protected:
     /**
@@ -162,6 +164,8 @@ class SyncInferRequest : public ov::IInferRequest {
     mutable std::vector<std::vector<ov::SoPtr<ov::ITensor>>> _userInputTensors;
     mutable std::vector<ov::SoPtr<ov::ITensor>> _userOutputTensors;
 
+    mutable std::vector<ov::SoPtr<ov::IVariableState>> _variableStates;
+
     /**
      * @see ov::ISyncInferRequest
      */
diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
@@ -102,6 +102,16 @@ const std::shared_ptr<const ov::ICompiledModel>& SyncInferRequest::get_compiled_
     return _compiledModel;
 }
 
+void SyncInferRequest::initialize_states() {
+    for (const ov::SoPtr<ov::IVariableState>& variableState : _variableStates) {
+        variableState->reset();
+    }
+}
+
+std::vector<ov::SoPtr<ov::IVariableState>> SyncInferRequest::query_state() const {
+    return _variableStates;
+}
+
 ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
     auto foundPort = find_port(port);
     OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);