diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
index b09caee5bca2ba..19121c70cc9a46 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -55,7 +55,7 @@ class ZeroInferRequest final : public SyncInferRequest {
                                                 const bool isInput,
                                                 const std::optional<std::size_t> batchSize = std::nullopt) const;
 
-    void add_state(const IODescriptor& descriptor, size_t tensorIndex) const override;
+    void add_state(const IODescriptor& descriptor, size_t tensorIndex) const;
 
     void update_pipeline_if_memory_changed();
     void update_states_if_memory_changed();
diff --git a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp
index c667d24a288d30..4804385f2389ba 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp
@@ -8,6 +8,7 @@
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
 #include "openvino/runtime/ivariable_state.hpp"
+#include "zero_tensor.hpp"
 
 namespace intel_npu {
 
@@ -20,57 +21,66 @@ class ZeroVariableState final : public ov::IVariableState {
 public:
     explicit ZeroVariableState(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                                const std::string& name,
-                               const ov::SoPtr<ov::ITensor>& tensor,
+                               const std::shared_ptr<ZeroTensor>& zero_tensor,
                                size_t tensor_index,
                                size_t related_tensor_index,
-                               const Config& config,
-                               bool external_memory_standard_allocation_supported);
+                               const Config& config);
 
     void set_state(const ov::SoPtr<ov::ITensor>& new_state) override;
 
     void reset() override;
 
+    ov::SoPtr<ov::ITensor> get_state() const override;
+
     /**
-     * @brief Get input tensor index used internally for the state
+     * @brief Get user state to not change the state of the tensor through get_state()
      */
-    size_t get_tensor_index() const;
+    ov::SoPtr<ov::ITensor> get_user_state() const;
 
     /**
-     * @brief Get output tensor index used internally for the state
-     * @details The related tensors are defined by state input, state output pairs.
+     * @brief Get internal level zero tensor. It can be different than the user tensor in case the user set a tensor
+     * that cannot be imported. Used by the InferenceRequest to update the arguments of the pipeline.
      */
-    size_t get_related_tensor_index() const;
+    std::shared_ptr<ZeroTensor> get_zero_state() const;
 
     /**
-     * @brief Get acknowledgment if the tensor was updated
+     * @brief Get input tensor index used internally for the state
      */
-    bool tensor_was_updated() const;
+    size_t get_tensor_index() const;
 
     /**
-     * @brief Reset tensor updated flag
+     * @brief Get output tensor index used internally for the state
+     * @details The related tensors are defined by state input, state output pairs.
      */
-    void reset_tensor_updated_flag();
+    size_t get_related_tensor_index() const;
 
     /**
-     * @brief Get acknowledgment if the zero tensor was updated
-     * @details In case the memory was allocated in the same level zero context update the zero tensor
+     * @brief Get acknowledgment if state was updated
+     * @details Used to check if the state's internal user tensor was updated. Actions might need to be taken by the
+     * InferenceRequest in that case. This flag can be cleared using clear_state_update_pending(). An update to the user
+     * tensor might not trigger an update of the level zero tensor as well. zero_state_update_pending() should be used
+     * to check if the level zero tensor was also updated.
      */
-    bool zero_tensor_should_be_updated() const;
+    bool state_update_pending() const;
 
     /**
-     * @brief Reset zero tensor updated flag
+     * @brief Reset state updated flag
+     * @details Must be used to reset the flag exposed through state_update_pending()
      */
-    void reset_zero_tensor_updated_flag();
+    void clear_state_update_pending();
 
     /**
-     * @brief Get acknowledgment if the zero tensor can be imported
+     * @brief Get acknowledgment if the zero state was updated
+     * @details Used to signal that the state's internal zero tensor was also updated. Actions might need to be taken by
+     * the InferenceRequest in that case. This flag can be cleared using clear_zero_state_update_pending().
      */
-    bool zero_tensor_should_be_imported() const;
+    bool zero_state_update_pending() const;
 
     /**
-     * @brief Reset zero tensor imported flag
+     * @brief Reset zero state updated flag
+     * @details Must be used to reset the flag exposed through zero_state_update_pending()
      */
-    void reset_tensor_imported_flag();
+    void clear_zero_state_update_pending();
 
     ~ZeroVariableState() override = default;
 
@@ -79,11 +89,10 @@ class ZeroVariableState final : public ov::IVariableState {
     size_t _tensor_index;
     size_t _related_tensor_index;
 
-    bool _tensor_updated = false;
-    bool _zero_tensor_updated = false;
-    bool _tensor_should_be_imported = false;
+    std::shared_ptr<ZeroTensor> _zero_state;
 
-    bool _external_memory_standard_allocation_supported = false;
+    bool _is_state_updated = false;
+    bool _is_zero_state_update_needed = false;
 
     const Config _config;
     Logger _logger;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index e5082fc661e823..3d2df2bbf05212 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -120,6 +120,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     for (const IODescriptor& inputDescriptor : _metadata.inputs) {
         check_level_zero_attributes_match(inputDescriptor, _graphInputDescriptors.at(ioIndex));
 
+        // Tensors for regular inputs will be allocated later, only for ports that were not set by the user.
+        // Allocating only tensors for shapes and states.
         if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) {
             ++ioIndex;
             continue;
@@ -127,6 +129,10 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
         get_level_zero_input(ioIndex) = allocate_tensor(ioIndex, INPUT);
 
+        if (inputDescriptor.isStateInput) {
+            add_state(inputDescriptor, ioIndex);
+        }
+
         ++ioIndex;
     }
 
@@ -134,11 +140,27 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     for (const IODescriptor& outputDescriptor : _metadata.outputs) {
         check_level_zero_attributes_match(outputDescriptor, _graphOutputDescriptors.at(ioIndex));
 
+        // Tensors for regular outputs will be allocated later, only for ports that were not set by the user.
+        // Allocating only tensors for shapes and states.
         if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) {
             ++ioIndex;
             continue;
         }
 
+        if (outputDescriptor.isStateOutput) {
+            // Only one buffer is required for each (state input, state output) pair, acting as an input before running
+            // the inference and as an output after performing it. Thus both the "state input" and "state output"
+            // entries shall point to the same buffer.
+            OPENVINO_ASSERT(outputDescriptor.relatedDescriptorIndex.has_value(),
+                            "The link between state descriptors is missing, state name: ",
+                            outputDescriptor.nameFromCompiler);
+            _levelZeroOutputTensors.at(ioIndex) = get_level_zero_input(*outputDescriptor.relatedDescriptorIndex);
+            _userOutputTensors.at(ioIndex) = _levelZeroOutputTensors.at(ioIndex);
+
+            ++ioIndex;
+            continue;
+        }
+
         _levelZeroOutputTensors.at(ioIndex) = allocate_tensor(ioIndex, OUTPUT);
 
         ++ioIndex;
@@ -240,42 +262,21 @@ void ZeroInferRequest::create_pipeline() {
                   "before creating the pipeline");
     for (const auto& variableState : _variableStates) {
         auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);
-
         OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");
 
-        if (zeroState->tensor_was_updated()) {
+        if (zeroState->state_update_pending()) {
             _logger.debug("ZeroInferRequest::create_pipeline - user state tensor should be updated");
 
-            get_user_input(zeroState->get_tensor_index()) = zeroState->get_state();
-            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state();
-            zeroState->reset_tensor_updated_flag();
-
-            auto& userInput = get_user_input(zeroState->get_tensor_index())._ptr;
+            get_user_input(zeroState->get_tensor_index()) = zeroState->get_user_state();
+            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_user_state();
+            zeroState->clear_state_update_pending();
 
-            if (zeroState->zero_tensor_should_be_updated()) {
+            if (zeroState->zero_state_update_pending()) {
                 _logger.debug("ZeroInferRequest::create_pipeline - level zero state tensor should be updated");
 
-                auto& levelZeroInput = get_level_zero_input(zeroState->get_tensor_index());
-                auto& levelZeroOutput = _levelZeroOutputTensors.at(zeroState->get_related_tensor_index());
-
-                if (zeroState->zero_tensor_should_be_imported()) {
-                    // TODO in further PR
-                    // auto hostMemSharedAllocator = zeroMemory::HostMemSharedAllocator(_initStructs, userInput);
-                    levelZeroInput = std::make_shared<ZeroTensor>(_initStructs,
-                                                                  _config,
-                                                                  userInput->get_element_type(),
-                                                                  userInput->get_shape(),
-                                                                  INPUT);
-
-                    levelZeroOutput = levelZeroInput;
-
-                    zeroState->reset_tensor_imported_flag();
-                } else {
-                    levelZeroInput = std::make_shared<ZeroTensor>(_initStructs, userInput, _config);
-                    levelZeroOutput = levelZeroInput;
-                }
-
-                zeroState->reset_zero_tensor_updated_flag();
+                get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state();
+                _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state();
+                zeroState->clear_zero_state_update_pending();
             }
         }
     }
@@ -568,34 +569,19 @@ std::shared_ptr<ZeroTensor> ZeroInferRequest::allocate_tensor(const size_t index
     const auto& descriptor = isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index);
     check_network_precision(descriptor.precision);
 
-    std::shared_ptr<ZeroTensor> tensor;
     ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape();
 
     if (batchSize.has_value()) {
         allocatedTensorShape[utils::BATCH_AXIS] = *batchSize;
     }
 
-    if (descriptor.isStateOutput) {
-        // Only one buffer is required for each (state input, state output) pair, acting as an input before running the
-        // inference and as an output after performing it. Thus both the "state input" and "state output" entries shall
-        // point to the same buffer.
-        OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(),
-                        "The link between state descriptors is missing, state name: ",
-                        descriptor.nameFromCompiler);
-        tensor = get_level_zero_input(*descriptor.relatedDescriptorIndex);
-    } else {
-        tensor =
-            std::make_shared<ZeroTensor>(_initStructs, _config, descriptor.precision, allocatedTensorShape, isInput);
-    }
+    auto tensor =
+        std::make_shared<ZeroTensor>(_initStructs, _config, descriptor.precision, allocatedTensorShape, isInput);
 
     if (isInput) {
         if (get_user_input(index) == nullptr) {
             get_user_input(index) = tensor;
         }
-
-        if (descriptor.isStateInput) {
-            add_state(descriptor, index);
-        }
     } else if (_userOutputTensors.at(index) == nullptr) {
         _userOutputTensors.at(index) = tensor;
     }
@@ -616,6 +602,12 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
         }
 
         if (levelZeroTensor.at(SINGLE_TENSOR)->memory_address_changed()) {
+            // Memory address can change only a through tensor reshape. Tensor reallocation for a larger shape is
+            // allowed only when mutable command list version >= 1.0. This point should not be reached otherwise.
+            if (_initStructs->getMutableCommandListExtVersion() < ZE_MAKE_VERSION(1, 0)) {
+                OPENVINO_THROW("Reallocation of zero memory is not supported with this driver.");
+            }
+
             _logger.debug("Update input graph descriptor with the new tensor");
             OPENVINO_ASSERT(levelZeroTensor.at(SINGLE_TENSOR)->data(), "Empty buffer");
 
@@ -642,6 +634,12 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
         }
 
         if (levelZeroTensor->memory_address_changed()) {
+            // Memory address can change only a through tensor reshape. Tensor reallocation for a larger shape is
+            // allowed only when mutable command list version >= 1.0. This point should not be reached otherwise.
+            if (_initStructs->getMutableCommandListExtVersion() < ZE_MAKE_VERSION(1, 0)) {
+                OPENVINO_THROW("Reallocation of zero memory is not supported with this driver.");
+            }
+
             _logger.debug("Update output graph descriptor with the new tensor");
             OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer");
 
@@ -659,80 +657,31 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
 void ZeroInferRequest::update_states_if_memory_changed() {
     for (const auto& variableState : _variableStates) {
         auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);
-
         OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");
 
-        if (zeroState->tensor_was_updated()) {
-            get_user_input(zeroState->get_tensor_index()) = zeroState->get_state();
-            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state();
-
-            zeroState->reset_tensor_updated_flag();
-
-            auto& userInput = get_user_input(zeroState->get_tensor_index())._ptr;
-            auto& levelZeroInput = get_level_zero_input(zeroState->get_tensor_index());
-            auto& levelZeroOutput = _levelZeroOutputTensors.at(zeroState->get_related_tensor_index());
-
-            if (zeroState->zero_tensor_should_be_updated()) {
-                if (zeroState->zero_tensor_should_be_imported()) {
-                    // TODO in further PR
-                    // auto hostMemSharedAllocator = zeroMemory::HostMemSharedAllocator(_initStructs, userInput);
-                    levelZeroInput = std::make_shared<ZeroTensor>(_initStructs,
-                                                                  _config,
-                                                                  userInput->get_element_type(),
-                                                                  userInput->get_shape(),
-                                                                  INPUT);
-
-                    levelZeroOutput = levelZeroInput;
-
-                    _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
-                                                      levelZeroInput->data(),
-                                                      levelZeroInput->get_byte_size());
-
-                    _pipeline->update_graph_arguments(
-                        _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
-                        levelZeroInput->data(),
-                        levelZeroInput->get_byte_size());
-
-                    zeroState->reset_tensor_imported_flag();
-                } else {
-                    levelZeroInput = std::make_shared<ZeroTensor>(_initStructs, zeroState->get_state(), _config);
-                    levelZeroOutput = levelZeroInput;
-
-                    _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
-                                                      levelZeroInput->data(),
-                                                      levelZeroInput->get_byte_size());
-
-                    _pipeline->update_graph_arguments(
-                        _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
-                        levelZeroInput->data(),
-                        levelZeroInput->get_byte_size());
-                }
-
-                zeroState->reset_zero_tensor_updated_flag();
-            } else if (levelZeroInput) {
-                if (!levelZeroInput->can_be_reused()) {
-                    levelZeroInput = std::make_shared<ZeroTensor>(
-                        _initStructs,
-                        _config,
-                        _metadata.inputs.at(zeroState->get_tensor_index()).precision,
-                        _metadata.inputs.at(zeroState->get_tensor_index()).shapeFromCompiler.get_max_shape(),
-                        OUTPUT);
-                    levelZeroOutput = levelZeroInput;
-
-                    _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
-                                                      levelZeroOutput->data(),
-                                                      levelZeroOutput->get_byte_size());
-
-                    _pipeline->update_graph_arguments(
-                        _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
-                        levelZeroOutput->data(),
-                        levelZeroOutput->get_byte_size());
-
-                    zeroState->reset_zero_tensor_updated_flag();
-                } else {
-                    _logger.debug("ZeroInferRequest::update_states_if_memory_changed - reusing the zero memory since "
-                                  "it is not shared with the user");
-                }
+        if (zeroState->state_update_pending()) {
+            get_user_input(zeroState->get_tensor_index()) = zeroState->get_user_state();
+            _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_user_state();
+            zeroState->clear_state_update_pending();
+
+            // State's tensor was previously updated. This change needs to be reflected in the inference request since
+            // states tensors are not visible inside the pipeline.
+            // Update input and output arguments that correspond to the state only if command lists are supported.
+            // Push/pull methods would later perform memory copies otherwise.
+            if (_initStructs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0) &&
+                zeroState->zero_state_update_pending()) {
+                get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_zero_state();
+                _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_zero_state();
+                zeroState->clear_zero_state_update_pending();
+
+                _pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
+                                                  get_level_zero_input(zeroState->get_tensor_index())->data(),
+                                                  get_level_zero_input(zeroState->get_tensor_index())->get_byte_size());
+
+                _pipeline->update_graph_arguments(
+                    _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
+                    _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(),
+                    _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size());
             }
         }
     }
@@ -760,11 +709,8 @@ void ZeroInferRequest::infer_async() {
             _pipelineIsCreated = true;
             _dynamicBatchValueChanged = false;  // Reset reallocation flag
         } else {
-            if (_initStructs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) {
-                update_pipeline_if_memory_changed();
-                update_states_if_memory_changed();
-            }
-            // If command list updates are not supported, fallback to copying tensors every time.
+            update_pipeline_if_memory_changed();
+            update_states_if_memory_changed();
         }
     }
 
@@ -986,11 +932,10 @@ void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIn
 
     _variableStates.push_back(std::make_shared<ZeroVariableState>(_initStructs,
                                                                   descriptor.nameFromCompiler,
-                                                                  get_user_input(tensorIndex),
+                                                                  get_level_zero_input(tensorIndex),
                                                                   tensorIndex,
                                                                   descriptor.relatedDescriptorIndex.value(),
-                                                                  _config,
-                                                                  _externalMemoryStandardAllocationSupported));
+                                                                  _config));
 }
 
 std::shared_ptr<ZeroTensor>& ZeroInferRequest::get_level_zero_input(size_t index, size_t tensorNo) const {
diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp
index 61ce4e3d24a6aa..bd186bbf54d336 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp
@@ -6,62 +6,84 @@
 
 #include "intel_npu/config/options.hpp"
 #include "intel_npu/utils/utils.hpp"
+#include "intel_npu/utils/zero/zero_host_tensor.hpp"
 #include "intel_npu/utils/zero/zero_remote_tensor.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
-#include "zero_tensor.hpp"
 
 namespace intel_npu {
 
 ZeroVariableState::ZeroVariableState(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                                      const std::string& name,
-                                     const ov::SoPtr<ov::ITensor>& tensor,
+                                     const std::shared_ptr<ZeroTensor>& zero_tensor,
                                      size_t tensor_index,
                                      size_t related_tensor_index,
-                                     const Config& config,
-                                     bool external_memory_standard_allocation_supported)
+                                     const Config& config)
     : ov::IVariableState(name),
       _init_structs(init_structs),
       _tensor_index(tensor_index),
       _related_tensor_index(related_tensor_index),
-      _external_memory_standard_allocation_supported(external_memory_standard_allocation_supported),
+      _zero_state(zero_tensor),
       _config(config),
       _logger("ZeroVariableState", _config.get<LOG_LEVEL>()) {
-    m_state = tensor;
+    m_state = _zero_state;
 }
 
 void ZeroVariableState::set_state(const ov::SoPtr<ov::ITensor>& new_state) {
-    m_state = new_state;
-    _tensor_updated = true;
+    if (m_state._ptr == new_state._ptr) {
+        // set_tensor called with the same tensor object; no action needed
+        _logger.debug("ZeroVariableState::set_state - got the same state, do nothing");
+        return;
+    }
 
-    if (_init_structs->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) {
-        if (!is_remote_tensor(new_state._ptr)) {
-            if (zeroUtils::memory_was_allocated_in_the_same_l0_context(_init_structs->getContext(),
-                                                                       new_state->data())) {
-                _logger.debug("ZeroVariableState::set_state - tensor was created in the same L0 context");
+    m_state = new_state;
+    _is_state_updated = true;
+
+    try {
+        _logger.debug("ZeroVariableState::set_state - create zero tensor");
+        // Try to use the user tensor directly if its underlying data is already allocated in the same Level Zero
+        // context.
+        _zero_state = std::make_shared<ZeroTensor>(_init_structs, m_state, _config);
+        _is_zero_state_update_needed = true;
+    } catch (const ZeroTensorException&) {
+        // Check if the current Level Zero tensor was previously shared with the user. If so, it cannot be reused;
+        // allocate a new tensor to back up the user tensor (which cannot be imported or used directly).
+        if (_zero_state == nullptr || !_zero_state->can_be_reused()) {
+            _logger.debug("ZeroVariableState::set_state - allocate locally L0 tensor");
+            _zero_state = std::make_shared<ZeroTensor>(_init_structs,
+                                                       _config,
+                                                       m_state->get_element_type(),
+                                                       m_state->get_shape(),
+                                                       false);
+            _is_zero_state_update_needed = true;
+        } else {
+            _logger.debug("ZeroVariableState::set_state - reusing the level zero tensor since it is not shared "
+                          "with the user");
+        }
+    }
+}
 
-                _zero_tensor_updated = true;
-            } else if (_external_memory_standard_allocation_supported &&
-                       utils::memory_and_size_aligned_to_standard_page_size(new_state->data(),
-                                                                            new_state->get_byte_size())) {
-                _logger.debug("ZeroVariableState::set_state - tensor will be imported");
+ov::SoPtr<ov::ITensor> ZeroVariableState::get_state() const {
+    auto zero_tensor = std::dynamic_pointer_cast<ZeroTensor>(m_state._ptr);
+    if (zero_tensor != nullptr) {
+        zero_tensor->prevent_reuse();
+    }
 
-                _tensor_should_be_imported = true;
-                _zero_tensor_updated = true;
-            }
+    return m_state;
+}
 
-            return;
-        }
+ov::SoPtr<ov::ITensor> ZeroVariableState::get_user_state() const {
+    return m_state;
+}
 
-        _zero_tensor_updated = true;
-    }
+std::shared_ptr<ZeroTensor> ZeroVariableState::get_zero_state() const {
+    return _zero_state;
 }
 
 void ZeroVariableState::reset() {
-    auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(m_state._ptr);
-
-    void* userBuffer = !remoteTensor ? m_state->data() : remoteTensor->get_original_memory();
+    auto remote_tensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(m_state._ptr);
 
-    std::memset(userBuffer, 0, m_state->get_byte_size());
+    void* user_buffer = !remote_tensor ? m_state->data() : remote_tensor->get_original_memory();
+    std::memset(user_buffer, 0, m_state->get_byte_size());
 }
 
 size_t ZeroVariableState::get_tensor_index() const {
@@ -72,28 +94,20 @@ size_t ZeroVariableState::get_related_tensor_index() const {
     return _related_tensor_index;
 }
 
-bool ZeroVariableState::tensor_was_updated() const {
-    return _tensor_updated;
-}
-
-void ZeroVariableState::reset_tensor_updated_flag() {
-    _tensor_updated = false;
-}
-
-bool ZeroVariableState::zero_tensor_should_be_updated() const {
-    return _zero_tensor_updated;
+bool ZeroVariableState::state_update_pending() const {
+    return _is_state_updated;
 }
 
-void ZeroVariableState::reset_zero_tensor_updated_flag() {
-    _zero_tensor_updated = false;
+void ZeroVariableState::clear_state_update_pending() {
+    _is_state_updated = false;
 }
 
-bool ZeroVariableState::zero_tensor_should_be_imported() const {
-    return _tensor_should_be_imported;
+bool ZeroVariableState::zero_state_update_pending() const {
+    return _is_zero_state_update_needed;
 }
 
-void ZeroVariableState::reset_tensor_imported_flag() {
-    _tensor_should_be_imported = false;
+void ZeroVariableState::clear_zero_state_update_pending() {
+    _is_zero_state_update_needed = false;
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp
index 4d0e0d9b7e1255..9c068a5cb80593 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp
@@ -6,7 +6,6 @@
 
 #include "intel_npu/common/icompiled_model.hpp"
 #include "intel_npu/common/igraph.hpp"
-#include "intel_npu/common/variable_state.hpp"
 #include "intel_npu/network_metadata.hpp"
 #include "openvino/runtime/iinfer_request.hpp"
 #include "openvino/runtime/iplugin.hpp"
@@ -148,8 +147,6 @@ class SyncInferRequest : public ov::IInferRequest {
      */
     virtual void check_network_precision(const ov::element::Type_t precision) const = 0;
 
-    virtual void add_state(const IODescriptor& descriptor, const size_t tensorIndex) const;
-
     bool is_batched_input(size_t idx) const;
 
     ov::SoPtr<ov::ITensor>& get_user_input(size_t index) const;
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp
deleted file mode 100644
index 0987f2b44bbb04..00000000000000
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (C) 2018-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/runtime/itensor.hpp"
-#include "openvino/runtime/ivariable_state.hpp"
-
-namespace intel_npu {
-
-class VariableState final : public ov::IVariableState {
-public:
-    explicit VariableState(const std::string& name, const ov::SoPtr<ov::ITensor>& tensor) : ov::IVariableState(name) {
-        m_state = tensor;
-    }
-
-    virtual void set_state(const ov::SoPtr<ov::ITensor>& newState) override {
-        if (newState->get_byte_size() != m_state->get_byte_size()) {
-            OPENVINO_THROW("Byte size mismatch");
-        }
-
-        std::memcpy(m_state->data(), newState->data(), newState->get_byte_size());
-    }
-
-    virtual void reset() override {
-        std::memset(m_state->data(), 0, m_state->get_byte_size());
-    }
-
-    ~VariableState() override = default;
-};
-
-}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
index baa32e6128898d..d3eed4e7357005 100644
--- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
@@ -323,11 +323,6 @@ void SyncInferRequest::check_tensors() const {
     }
 }
 
-void SyncInferRequest::add_state(const IODescriptor& descriptor, const size_t tensorIndex) const {
-    _variableStates.push_back(
-        std::make_shared<VariableState>(descriptor.nameFromCompiler, get_user_input(tensorIndex)));
-}
-
 bool SyncInferRequest::is_batched_input(size_t idx) const {
     return _userInputTensors.at(idx).size() > 1;
 }
diff --git a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp
similarity index 71%
rename from src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp
rename to src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp
index d0b16f447946ff..5613d6aea4fa36 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.cpp
+++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "behavior/zero_tensor/zero_tensor_run.hpp"
+#include "internal/backend/zero_tensor_tests.hpp"
 
 #include "common/npu_test_env_cfg.hpp"
 #include "common/utils.hpp"
@@ -13,8 +13,8 @@ using namespace ov::test::behavior;
 
 const std::vector<ov::AnyMap> configsInferRequestRunTests = {{}};
 
-INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
-                         ZeroTensorRunTests,
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest,
+                         ZeroTensorTests,
                          ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
                                             ::testing::ValuesIn(configsInferRequestRunTests)),
-                         ZeroTensorRunTests::getTestCaseName);
+                         ZeroTensorTests::getTestCaseName);
diff --git a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp
similarity index 90%
rename from src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp
rename to src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp
index 6c01a9b9ef7766..79e1f0b349c382 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/zero_tensor/zero_tensor_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_tensor_tests.hpp
@@ -43,8 +43,8 @@ using ::testing::HasSubstr;
 namespace ov {
 namespace test {
 namespace behavior {
-class ZeroTensorRunTests : public ov::test::behavior::OVPluginTestBase,
-                           public testing::WithParamInterface<CompilationParams> {
+class ZeroTensorTests : public ov::test::behavior::OVPluginTestBase,
+                        public testing::WithParamInterface<CompilationParams> {
 protected:
     std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
     ov::AnyMap configuration;
@@ -91,7 +91,7 @@ class ZeroTensorRunTests : public ov::test::behavior::OVPluginTestBase,
     }
 };
 
-TEST_P(ZeroTensorRunTests, AllocateDeleteAllocateZeroTensor) {
+TEST_P(ZeroTensorTests, AllocateDeleteAllocateZeroTensor) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto shape = Shape{1, 2, 2, 2};
@@ -110,7 +110,7 @@ TEST_P(ZeroTensorRunTests, AllocateDeleteAllocateZeroTensor) {
         ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(), address));
 }
 
-TEST_P(ZeroTensorRunTests, CheckSetSmallerShape) {
+TEST_P(ZeroTensorTests, CheckSetSmallerShape) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto shape = Shape{1, 20, 20, 20};
@@ -134,7 +134,7 @@ TEST_P(ZeroTensorRunTests, CheckSetSmallerShape) {
                                                                                     zero_tensor->data()));
 }
 
-TEST_P(ZeroTensorRunTests, CheckSetBiggerShape) {
+TEST_P(ZeroTensorTests, CheckSetBiggerShape) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto shape = Shape{1, 20, 20, 20};
@@ -148,16 +148,20 @@ TEST_P(ZeroTensorRunTests, CheckSetBiggerShape) {
     auto new_shape_size = ov::shape_size(new_shape);
     // set_shape() will force tensor reallocation for a larger shape. The new data pointer must also be a valid level
     // zero address.
-    zero_tensor->set_shape(new_shape);
-    EXPECT_EQ(new_shape, zero_tensor->get_shape());
-    EXPECT_EQ(new_shape_size, zero_tensor->get_size());
-    EXPECT_EQ(new_shape_size * sizeof(ov::element::f32), zero_tensor->get_byte_size());
-    ASSERT_TRUE(zero_tensor->memory_address_changed());
-    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
-                                                                                    zero_tensor->data()));
+    if (init_struct->getMutableCommandListExtVersion() >= ZE_MAKE_VERSION(1, 0)) {
+        zero_tensor->set_shape(new_shape);
+        EXPECT_EQ(new_shape, zero_tensor->get_shape());
+        EXPECT_EQ(new_shape_size, zero_tensor->get_size());
+        EXPECT_EQ(new_shape_size * sizeof(ov::element::f32), zero_tensor->get_byte_size());
+        ASSERT_TRUE(zero_tensor->memory_address_changed());
+        ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                        zero_tensor->data()));
+    } else {
+        ASSERT_THROW(zero_tensor->set_shape(new_shape), ov::Exception);
+    }
 }
 
-TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensorScalar) {
+TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensorScalar) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto zero_tensor =
@@ -171,7 +175,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensorScalar) {
     EXPECT_EQ(view_tensor.is_continuous(), true);
 }
 
-TEST_P(ZeroTensorRunTests, CheckIsContinuousHostTensor1Dimension) {
+TEST_P(ZeroTensorTests, CheckIsContinuousHostTensor1Dimension) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto zero_tensor =
@@ -189,7 +193,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousHostTensor1Dimension) {
     EXPECT_EQ(view_tensor.is_continuous(), true);
 }
 
-TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor2Dimensions) {
+TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor2Dimensions) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto zero_tensor =
@@ -212,7 +216,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor2Dimensions) {
     EXPECT_EQ(view_tensor.is_continuous(), false);
 }
 
-TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor3Dimensions) {
+TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor3Dimensions) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto zero_tensor =
@@ -238,7 +242,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor3Dimensions) {
     EXPECT_EQ(view_tensor.is_continuous(), true);
 }
 
-TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor4Dimensions) {
+TEST_P(ZeroTensorTests, CheckIsContinuousZeroTensor4Dimensions) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct,
@@ -279,7 +283,7 @@ TEST_P(ZeroTensorRunTests, CheckIsContinuousZeroTensor4Dimensions) {
     EXPECT_EQ(view_tensor.is_continuous(), true);
 }
 
-TEST_P(ZeroTensorRunTests, CopyDefaultTensorExpectedThrow) {
+TEST_P(ZeroTensorTests, CopyDefaultTensorExpectedThrow) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto shape = Shape{1, 2, 2, 2};
@@ -293,7 +297,7 @@ TEST_P(ZeroTensorRunTests, CopyDefaultTensorExpectedThrow) {
     ::operator delete(data);
 }
 
-TEST_P(ZeroTensorRunTests, CopyZeroTensorAndKeepAlive) {
+TEST_P(ZeroTensorTests, CopyZeroTensorAndKeepAlive) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     auto shape = Shape{1, 2, 2, 2};
@@ -314,7 +318,7 @@ TEST_P(ZeroTensorRunTests, CopyZeroTensorAndKeepAlive) {
     ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception);
 }
 
-TEST_P(ZeroTensorRunTests, CopyHostTensorAndKeepAlive) {
+TEST_P(ZeroTensorTests, CopyHostTensorAndKeepAlive) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>();
@@ -337,7 +341,7 @@ TEST_P(ZeroTensorRunTests, CopyHostTensorAndKeepAlive) {
     ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception);
 }
 
-TEST_P(ZeroTensorRunTests, CopyRemoteTensorAndKeepAlive) {
+TEST_P(ZeroTensorTests, CopyRemoteTensorAndKeepAlive) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>();
@@ -360,7 +364,7 @@ TEST_P(ZeroTensorRunTests, CopyRemoteTensorAndKeepAlive) {
     ASSERT_THROW(copy_zero_tensor->set_shape({1, 20, 20, 20}), ov::Exception);
 }
 
-TEST_P(ZeroTensorRunTests, CopyRemoteTensorFromAnotherContextThrow) {
+TEST_P(ZeroTensorTests, CopyRemoteTensorFromAnotherContextThrow) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>();
diff --git a/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp
new file mode 100644
index 00000000000000..fa7d6ab85adfc5
--- /dev/null
+++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "internal/backend/zero_variable_state_tests.hpp"
+
+#include "common/npu_test_env_cfg.hpp"
+#include "common/utils.hpp"
+#include "intel_npu/config/options.hpp"
+#include "intel_npu/npu_private_properties.hpp"
+
+using namespace ov::test::behavior;
+
+const std::vector<ov::AnyMap> configsInferRequestRunTests = {{}};
+
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest,
+                         ZeroVariableStateTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configsInferRequestRunTests)),
+                         ZeroVariableStateTests::getTestCaseName);
diff --git a/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp
new file mode 100644
index 00000000000000..90c1b1de8e8ecd
--- /dev/null
+++ b/src/plugins/intel_npu/tests/functional/internal/backend/zero_variable_state_tests.hpp
@@ -0,0 +1,327 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gmock/gmock-matchers.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <array>
+#include <cstddef>
+#include <exception>
+#include <memory>
+#include <random>
+#include <thread>
+
+#include "common/npu_test_env_cfg.hpp"
+#include "common/utils.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
+#include "intel_npu/common/npu.hpp"
+#include "intel_npu/config/config.hpp"
+#include "intel_npu/config/options.hpp"
+#include "intel_npu/utils/zero/zero_host_tensor.hpp"
+#include "intel_npu/utils/zero/zero_init.hpp"
+#include "intel_npu/utils/zero/zero_remote_tensor.hpp"
+#include "intel_npu/utils/zero/zero_utils.hpp"
+#include "openvino/core/any.hpp"
+#include "openvino/runtime/core.hpp"
+#include "openvino/runtime/make_tensor.hpp"
+#include "remote_context.hpp"
+#include "shared_test_classes/base/ov_behavior_test_utils.hpp"
+#include "zero_backend.hpp"
+#include "zero_tensor.hpp"
+#include "zero_variable_state.hpp"
+
+using CompilationParams = std::tuple<std::string,  // Device name
+                                     ov::AnyMap    // Config
+                                     >;
+
+using ::testing::AllOf;
+using ::testing::HasSubstr;
+
+namespace ov {
+namespace test {
+namespace behavior {
+class ZeroVariableStateTests : public ov::test::behavior::OVPluginTestBase,
+                               public testing::WithParamInterface<CompilationParams> {
+protected:
+    std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
+    ov::AnyMap configuration;
+    std::shared_ptr<::intel_npu::ZeroInitStructsHolder> init_struct;
+    std::shared_ptr<::intel_npu::OptionsDesc> options = std::make_shared<::intel_npu::OptionsDesc>();
+    ::intel_npu::Config npu_config = ::intel_npu::Config(options);
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<CompilationParams> obj) {
+        std::string targetDevice;
+        ov::AnyMap configuration;
+        std::tie(targetDevice, configuration) = obj.param;
+        std::replace(targetDevice.begin(), targetDevice.end(), ':', '_');
+        targetDevice = ov::test::utils::getTestsPlatformFromEnvironmentOr(ov::test::utils::DEVICE_NPU);
+
+        std::ostringstream result;
+        result << "targetDevice=" << targetDevice << "_";
+        result << "targetPlatform=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(targetDevice) << "_";
+        if (!configuration.empty()) {
+            for (auto& configItem : configuration) {
+                result << "configItem=" << configItem.first << "_";
+                configItem.second.print(result);
+            }
+        }
+
+        return result.str();
+    }
+
+    void SetUp() override {
+        std::tie(target_device, configuration) = this->GetParam();
+
+        SKIP_IF_CURRENT_TEST_IS_DISABLED()
+        OVPluginTestBase::SetUp();
+
+        init_struct = ::intel_npu::ZeroInitStructsHolder::getInstance();
+    }
+
+    void TearDown() override {
+        if (!configuration.empty()) {
+            utils::PluginCache::get().reset();
+        }
+
+        APIBaseTest::TearDown();
+    }
+};
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndCheckAgainstZeroTensorState) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    auto shape = Shape{1, 2, 2, 2};
+    auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config);
+    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                    zero_state->get_state()->data()));
+
+    EXPECT_EQ(zero_state->get_state()->data(), zero_tensor->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_tensor->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_tensor->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_tensor->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_tensor->get_element_type());
+
+    EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithZeroTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    auto shape = Shape{1, 2, 2, 2};
+    auto zero_tensor0 = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor0, 1, 1, npu_config);
+    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                    zero_state->get_state()->data()));
+
+    auto zero_tensor1 = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    zero_state->set_state(zero_tensor1);
+    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                    zero_state->get_state()->data()));
+
+    ASSERT_TRUE(zero_state->state_update_pending());
+    ASSERT_TRUE(zero_state->zero_state_update_pending());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_tensor0->data());
+
+    EXPECT_EQ(zero_state->get_state()->data(), zero_tensor1->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_tensor1->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_tensor1->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_tensor1->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_tensor1->get_element_type());
+
+    EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithNormalTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    auto shape = Shape{1, 2, 2, 2};
+    auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config);
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_state->get_user_state()->data()));
+
+    // shape size is unaligned to standard page size, expect to fail
+    auto data = static_cast<float*>(::operator new(ov::shape_size(shape) * sizeof(ov::element::f32)));
+    auto tensor = make_tensor(ov::element::f32, shape, data);
+    zero_state->set_state(tensor);
+    ASSERT_FALSE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                     zero_state->get_state()->data()));
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_state->get_zero_state()->data()));
+
+    ASSERT_TRUE(zero_state->state_update_pending());
+    ASSERT_FALSE(zero_state->zero_state_update_pending());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data());
+    EXPECT_EQ(zero_state->get_zero_state()->data(), zero_tensor->data());
+
+    EXPECT_EQ(zero_state->get_state()->data(), tensor->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), tensor->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), tensor->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), tensor->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), tensor->get_element_type());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithNormalTensorAfterGetState) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    auto shape = Shape{1, 2, 2, 2};
+    auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config);
+    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                    zero_state->get_state()->data()));
+
+    // shape size is unaligned to standard page size, expect to fail
+    auto data = static_cast<float*>(::operator new(ov::shape_size(shape) * sizeof(ov::element::f32)));
+    auto tensor = make_tensor(ov::element::f32, shape, data);
+    zero_state->set_state(tensor);
+    ASSERT_FALSE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                     zero_state->get_state()->data()));
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_state->get_zero_state()->data()));
+
+    ASSERT_TRUE(zero_state->state_update_pending());
+    ASSERT_TRUE(zero_state->zero_state_update_pending());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data());
+    EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data());
+
+    EXPECT_EQ(zero_state->get_state()->data(), tensor->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), tensor->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), tensor->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), tensor->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), tensor->get_element_type());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithHostTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>();
+    auto zero_context = std::make_shared<::intel_npu::RemoteContextImpl>(engine_backend);
+    auto shape = Shape{1, 2, 2, 2};
+
+    auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config);
+
+    // shape size is unaligned to standard page size, expect to fail
+    auto host_tensor =
+        std::make_shared<::intel_npu::ZeroHostTensor>(zero_context, init_struct, ov::element::f32, shape);
+    zero_state->set_state(host_tensor);
+    ASSERT_TRUE(::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                                    zero_state->get_state()->data()));
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_state->get_zero_state()->data()));
+
+    ASSERT_TRUE(zero_state->state_update_pending());
+    ASSERT_TRUE(zero_state->zero_state_update_pending());
+
+    EXPECT_NE(zero_state->get_state()->data(), zero_tensor->data());
+    EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data());
+
+    EXPECT_EQ(zero_state->get_state()->data(), host_tensor->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), host_tensor->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), host_tensor->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), host_tensor->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), host_tensor->get_element_type());
+
+    EXPECT_EQ(zero_state->get_state()->data(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+TEST_P(ZeroVariableStateTests, CreateZeroStateAndUseSetStateWithRemoteTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    std::shared_ptr<::intel_npu::IEngineBackend> engine_backend = std::make_shared<::intel_npu::ZeroEngineBackend>();
+    auto zero_context = std::make_shared<::intel_npu::RemoteContextImpl>(engine_backend);
+    auto shape = Shape{1, 2, 2, 2};
+
+    auto zero_tensor = std::make_shared<::intel_npu::ZeroTensor>(init_struct, npu_config, element::f32, shape, true);
+    auto zero_state =
+        std::make_shared<::intel_npu::ZeroVariableState>(init_struct, "state", zero_tensor, 1, 1, npu_config);
+
+    // shape size is unaligned to standard page size, expect to fail
+    auto remote_tensor =
+        std::make_shared<::intel_npu::ZeroRemoteTensor>(zero_context, init_struct, ov::element::f32, shape);
+    zero_state->set_state(remote_tensor);
+    auto zero_remote_tensor = std::dynamic_pointer_cast<::intel_npu::ZeroRemoteTensor>(zero_state->get_state()._ptr);
+
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_remote_tensor->get_original_memory()));
+    ASSERT_TRUE(
+        ::intel_npu::zeroUtils::memory_was_allocated_in_the_same_l0_context(init_struct->getContext(),
+                                                                            zero_state->get_zero_state()->data()));
+
+    ASSERT_TRUE(zero_state->state_update_pending());
+    ASSERT_TRUE(zero_state->zero_state_update_pending());
+
+    EXPECT_NE(zero_remote_tensor->get_original_memory(), zero_tensor->data());
+    EXPECT_NE(zero_state->get_zero_state()->data(), zero_tensor->data());
+
+    EXPECT_EQ(zero_remote_tensor->get_original_memory(), remote_tensor->get_original_memory());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), remote_tensor->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), remote_tensor->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), remote_tensor->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), remote_tensor->get_element_type());
+
+    EXPECT_EQ(zero_remote_tensor->get_original_memory(), zero_state->get_zero_state()->data());
+    EXPECT_EQ(zero_state->get_state()->get_shape(), zero_state->get_zero_state()->get_shape());
+    EXPECT_EQ(zero_state->get_state()->get_size(), zero_state->get_zero_state()->get_size());
+    EXPECT_EQ(zero_state->get_state()->get_byte_size(), zero_state->get_zero_state()->get_byte_size());
+    EXPECT_EQ(zero_state->get_state()->get_element_type(), zero_state->get_zero_state()->get_element_type());
+
+    OV_ASSERT_NO_THROW(zero_state->reset());
+}
+
+}  // namespace behavior
+}  // namespace test
+}  // namespace ov