From 63f723ea09272b57168c6ef5ff3c86f01b930692 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Mon, 15 Sep 2025 17:13:28 +0400
Subject: [PATCH 1/3] [CPU] Weightless cache support

---
 samples/cpp/benchmark_app/main.cpp            |   5 +
 ...k_subgraphs_to_keep_in_mixed_precision.cpp |   1 -
 src/core/CMakeLists.txt                       |   4 +-
 .../rt_info/weightless_caching_attributes.hpp |   2 +
 .../openvino/xml_util/xml_serialize_util.hpp  |   4 +-
 .../op/util/weightless_caching_attributes.cpp |   9 +-
 src/core/src/xml_util/xml_serialize_util.cpp  |   6 +-
 .../xml_util/xml_deserialize_util.hpp         |   3 +
 src/inference/src/dev/core_impl.cpp           |  22 +-
 src/plugins/intel_cpu/CMakeLists.txt          |   4 +-
 src/plugins/intel_cpu/src/compiled_model.cpp  |  10 +-
 src/plugins/intel_cpu/src/config.cpp          |   9 +-
 src/plugins/intel_cpu/src/config.h            |   2 +
 src/plugins/intel_cpu/src/plugin.cpp          | 208 ++++++++++++++----
 src/plugins/intel_cpu/src/plugin.h            |   2 +-
 .../deserializer.cpp}                         | 142 +++++++++---
 .../utils/graph_serializer/deserializer.hpp   | 119 ++++++++++
 .../src/utils/graph_serializer/serializer.cpp | 181 +++++++++++++++
 .../src/utils/graph_serializer/serializer.hpp |  39 ++++
 src/plugins/intel_cpu/src/utils/serialize.hpp |  64 ------
 .../ov_executable_network/properties.cpp      |   2 +-
 .../custom/behavior/ov_plugin/properties.cpp  |   5 +
 .../behavior/compiled_model/model_cache.cpp   |  26 +++
 .../skip_tests_config.cpp                     |   2 +
 .../intel_cpu/tests/unit/CMakeLists.txt       |   1 +
 .../tests/unit/vectorized/CMakeLists.txt      |   1 +
 .../behavior/compiled_model/model_cache.hpp   |  56 +++++
 .../behavior/compiled_model/model_cache.cpp   | 190 ++++++++++++++++
 28 files changed, 954 insertions(+), 165 deletions(-)
 rename src/plugins/intel_cpu/src/utils/{serialize.cpp => graph_serializer/deserializer.cpp} (58%)
 create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
 create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp
 create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp
 delete mode 100644 src/plugins/intel_cpu/src/utils/serialize.hpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp
 create mode 100644 src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp
 create mode 100644 src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp

diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 3ae50e13d7433e..928ed621d4c736 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -606,6 +606,11 @@ int main(int argc, char* argv[]) {
             if (is_virtual_device(device)) {
                 device_nstreams.erase(device);
             }
+
+            if (!FLAGS_cache_dir.empty()) {
+                // Choose between better model compilation time and cache file size.
+                device_config[ov::cache_mode.name()] = ov::CacheMode::OPTIMIZE_SPEED;
+            }
         }
         auto result = std::find_if(config.begin(), config.end(), [&](const std::pair<std::string, ov::AnyMap>& item) {
             return device_name.find(item.first) == 0;
diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
index 27c137a1731fe1..f0a091b65219f7 100644
--- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
@@ -50,7 +50,6 @@
 #include "openvino/pass/pattern/op/or.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
-#include "transformations/convert_precision.hpp"
 #include "transformations/fp16_compression/mark_floatpoint_range.hpp"
 #include "transformations/rt_info/disable_fp16_compression.hpp"
 #include "transformations/utils/utils.hpp"
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 1dadebdf3e66d1..3443b20c5a77e0 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -122,9 +122,7 @@ endif()
 # some sources are located in openvino_core, while headers are in openvino_transformations
 file(GLOB_RECURSE smart_reshape_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/smart_reshape/*.cpp)
 file(GLOB_RECURSE rt_info_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/rt_info/*.cpp)
-set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_precision.cpp"
-                            "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
-                            "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/init_node_info.cpp"
+set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
                             "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/serialize.cpp"
                             "${CMAKE_CURRENT_SOURCE_DIR}/src/op/type_relaxed.cpp"
                             "${CMAKE_CURRENT_SOURCE_DIR}/src/preprocess/preprocess_steps_impl.cpp"
diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
index f5c7d3446dbcb6..7e5ea18bbcb853 100644
--- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
+++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
@@ -35,6 +35,8 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute {
 
     bool is_copyable() const override;
 
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
     size_t original_size;
     size_t bin_offset;
     ov::element::Type original_dtype;
diff --git a/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp b/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp
index c847b61bd3a96d..ff72ad9b35d56b 100644
--- a/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp
+++ b/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp
@@ -76,9 +76,7 @@ class OPENVINO_API XmlSerializer : public ov::AttributeVisitor {
     virtual void append_rt_info(pugi::xml_node& node, ov::RTMap& attributes);
     virtual bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute);
     virtual bool append_node_attributes(ov::Node& node);
-    virtual util::ConstantWriter& get_constant_write_handler() const {
-        return m_constant_node_write_handler;
-    }
+    virtual util::ConstantWriter& get_constant_write_handler();
 
 public:
     XmlSerializer(pugi::xml_node& data,
diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp
index 4e595e475226d1..6deb130709d5c7 100644
--- a/src/core/src/op/util/weightless_caching_attributes.cpp
+++ b/src/core/src/op/util/weightless_caching_attributes.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,6 +11,13 @@ bool ov::WeightlessCacheAttribute::is_copyable() const {
     return false;
 }
 
+bool ov::WeightlessCacheAttribute::visit_attributes(AttributeVisitor& visitor) {
+    visitor.on_attribute("original_dtype", original_dtype);
+    visitor.on_attribute("bin_offset", bin_offset);
+    visitor.on_attribute("original_size", original_size);
+    return true;
+}
+
 OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr<ov::Node>& from,
                                                  const std::shared_ptr<ov::Node>& to) {
     const auto& rt_info = from->get_rt_info();
diff --git a/src/core/src/xml_util/xml_serialize_util.cpp b/src/core/src/xml_util/xml_serialize_util.cpp
index 96a8c1772344a0..97af95cf099a2d 100644
--- a/src/core/src/xml_util/xml_serialize_util.cpp
+++ b/src/core/src/xml_util/xml_serialize_util.cpp
@@ -1061,9 +1061,9 @@ bool XmlSerializer::append_node_attributes(ov::Node& node) {
     return node.visit_attributes(*this);
 }
 
-// util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
-//     return m_constant_node_write_handler.get();
-// }
+util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
+    return m_constant_node_write_handler.get();
+}
 
 std::string get_ir_precision_name(const element::Type& precision) {
     switch (precision) {
diff --git a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
index d7d9ea02b3dd28..653810b0ca3eaf 100644
--- a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
+++ b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
@@ -53,6 +53,9 @@ class XmlDeserializer : public ov::AttributeVisitor {
     virtual void set_constant_num_buffer(ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>& adapter);
 
     const pugi::xml_node& get_node() const;
+    const std::shared_ptr<ov::AlignedBuffer>& get_weights() const {
+        return m_weights;
+    }
 
 private:
     struct IoMap {
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index db916280eabfaf..56e918714f7610 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -30,6 +30,7 @@
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/file_util.hpp"
+#include "openvino/util/log.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "openvino/util/variant_visitor.hpp"
 #include "openvino/util/xml_parse_utils.hpp"
@@ -851,6 +852,16 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
         const auto compiled_config = create_compile_config(plugin, parsed._config);
         cache_content.blobId = ModelCache::compute_hash(model, cache_content.modelPath, compiled_config);
         cache_content.model = model;
+
+        const auto& cache_mode_it = config.find(cache_mode.name());
+        if (cache_mode_it != config.end() && cache_mode_it->second == CacheMode::OPTIMIZE_SIZE) {
+            const auto& rt_info = model->get_rt_info();
+            auto weights_path = rt_info.find("__weights_path");
+            if (weights_path != rt_info.end()) {
+                parsed._config[ov::weights_path.name()] = weights_path->second;
+            }
+        }
+
         const auto lock = cacheGuard.get_hash_lock(cache_content.blobId);
         res = load_model_from_cache(cache_content, plugin, parsed._config, {}, [&]() {
             return compile_model_and_cache(plugin, model, parsed._config, {}, cache_content);
@@ -1594,10 +1605,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
                     update_config[ov::hint::model.name()] = cacheContent.model;
                 }
 
-                if (util::contains(plugin.get_property(ov::supported_properties), ov::hint::model) &&
-                    cacheContent.model) {
-                    update_config[ov::hint::model.name()] = cacheContent.model;
-                }
                 if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
                     util::Path weights_path;
 
@@ -1606,7 +1613,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
                         weights_path = path_hint->second.as<std::string>();
                     } else if (weights_path = extract_weight_path(header.get_runtime_info()); weights_path.empty()) {
                         weights_path = cacheContent.modelPath;
-                        weights_path.replace_extension(".bin");
                     }
                     weights_path.replace_extension(".bin");
 
@@ -1638,9 +1644,11 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
         // throw;
     }
 
-    // fallback scenario
-    if (!compiled_model)
+    // Fallback scenario
+    if (!compiled_model) {
+        OPENVINO_WARN("Could not load model from cache.");
         compiled_model = compile_model_lambda();
+    }
 
     return compiled_model;
 }
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
index f0bcf4b7287285..d067bb58ed08ff 100644
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@@ -261,7 +261,8 @@ ov_mark_target_as_cc(${TARGET_NAME})
 
 target_link_libraries(${TARGET_NAME} PRIVATE dnnl
                                              openvino::shape_inference
-                                             openvino::snippets)
+                                             openvino::snippets
+                                             openvino_xml_util)
 
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 if (ENABLE_MLAS_FOR_CPU)
@@ -397,6 +398,7 @@ if(BUILD_SHARED_LIBS)
             $<TARGET_PROPERTY:openvino::shape_inference,INTERFACE_INCLUDE_DIRECTORIES>
             $<TARGET_PROPERTY:openvino::snippets,INTERFACE_INCLUDE_DIRECTORIES>
             $<TARGET_PROPERTY:openvino::reference,INTERFACE_INCLUDE_DIRECTORIES>
+            $<TARGET_PROPERTY:openvino::xml_util,INTERFACE_INCLUDE_DIRECTORIES>
         PUBLIC
             ${CMAKE_CURRENT_SOURCE_DIR}/src
             $<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>)
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 2940d6ad41382a..82188856c853d7 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -36,8 +36,8 @@
 #include "sub_memory_manager.hpp"
 #include "utils/debug_capabilities.h"
 #include "utils/general_utils.h"
+#include "utils/graph_serializer/serializer.hpp"
 #include "utils/memory_stats_dump.hpp"
-#include "utils/serialize.hpp"
 
 #if defined(OV_CPU_WITH_ACL)
 #    include <arm_compute/runtime/IScheduler.h>
@@ -303,8 +303,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             RO_property(ov::key_cache_precision.name()),
             RO_property(ov::value_cache_precision.name()),
             RO_property(ov::key_cache_group_size.name()),
-            RO_property(ov::value_cache_group_size.name()),
-        };
+            RO_property(ov::value_cache_group_size.name())};
 
         return ro_properties;
     }
@@ -400,11 +399,14 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     if (name == ov::value_cache_group_size) {
         return static_cast<decltype(ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize);
     }
+    if (name == ov::weights_path) {
+        return static_cast<decltype(ov::weights_path)::value_type>("");
+    }
     OPENVINO_THROW("Unsupported property: ", name);
 }
 
 void CompiledModel::export_model(std::ostream& modelStream) const {
-    ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt);
+    ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt, m_cfg.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE);
     serializer << m_model;
 }
 
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 4de2b7f7133cad..dce25d5f24f080 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -446,7 +446,14 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             } catch (ov::Exception&) {
                 OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name());
             }
-        } else if (key == ov::internal::caching_with_mmap.name()) {
+        } else if (key == ov::cache_mode.name()) {
+            try {
+                m_cache_mode = val.as<ov::CacheMode>();
+            } catch (...) {
+                OPENVINO_THROW("Wrong value for property key ", ov::cache_mode.name());
+            }
+        } else if (key == ov::hint::model.name() || key == ov::internal::caching_with_mmap.name() ||
+                   key == ov::weights_path.name()) {
         } else if (key == ov::intel_cpu::enable_sage_attn.name()) {
             try {
                 enableSageAttn = val.as<bool>();
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 3d3337ea7e136f..68ea781a204c34 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -138,6 +138,8 @@ struct Config {
     std::function<std::string(const std::string&)> cacheEncrypt;
     std::function<std::string(const std::string&)> cacheDecrypt;
 
+    ov::CacheMode m_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
+
 #ifdef CPU_DEBUG_CAPS
     DebugCapsConfig debugCaps;
     void applyDebugCapsProperties();
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 57cb1737b3a3ab..226c198851e29a 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -49,14 +49,16 @@
 #include "openvino/runtime/threading/cpu_message.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/runtime/threading/istreams_executor.hpp"
+#include "openvino/util/xml_parse_utils.hpp"
 #include "sigstack_manager.h"
 #include "transformations/transformation_pipeline.h"
 #include "transformations/utils/utils.hpp"
 #include "utils/codec_xor.hpp"
 #include "utils/debug_capabilities.h"
 #include "utils/denormals.hpp"
+#include "utils/graph_serializer/deserializer.hpp"
+#include "utils/graph_serializer/serializer.hpp"
 #include "utils/precision_support.h"
-#include "utils/serialize.hpp"
 #include "weights_cache.hpp"
 #include "xbyak/xbyak_util.h"
 
@@ -511,6 +513,10 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
         return decltype(ov::value_cache_group_size)::value_type(engConfig.valueCacheGroupSize);
     }
 
+    if (name == ov::weights_path) {
+        return decltype(ov::weights_path)::value_type(std::string(""));
+    }
+
     return get_ro_property(name, options);
 }
 
@@ -521,6 +527,9 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const
     auto RW_property = [](const std::string& propertyName) {
         return ov::PropertyName(propertyName, ov::PropertyMutability::RW);
     };
+    auto WO_property = [](const std::string& propertyName) {
+        return ov::PropertyName(propertyName, ov::PropertyMutability::WO);
+    };
 
     if (name == ov::supported_properties) {
         std::vector<ov::PropertyName> roProperties{
@@ -535,36 +544,37 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const
             RO_property(ov::device::architecture.name()),
         };
         // the whole config is RW before model is loaded.
-        std::vector<ov::PropertyName> rwProperties{
-            RW_property(ov::num_streams.name()),
-            RW_property(ov::inference_num_threads.name()),
-            RW_property(ov::enable_profiling.name()),
-            RW_property(ov::hint::inference_precision.name()),
-            RW_property(ov::hint::performance_mode.name()),
-            RW_property(ov::hint::execution_mode.name()),
-            RW_property(ov::hint::num_requests.name()),
-            RW_property(ov::hint::enable_cpu_pinning.name()),
-            RW_property(ov::hint::enable_cpu_reservation.name()),
-            RW_property(ov::hint::scheduling_core_type.name()),
-            RW_property(ov::hint::model_distribution_policy.name()),
-            RW_property(ov::hint::enable_hyper_threading.name()),
-            RW_property(ov::device::id.name()),
-            RW_property(ov::intel_cpu::denormals_optimization.name()),
-            RW_property(ov::log::level.name()),
-            RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
-            RW_property(ov::intel_cpu::enable_tensor_parallel.name()),
-            RW_property(ov::hint::dynamic_quantization_group_size.name()),
-            RW_property(ov::hint::kv_cache_precision.name()),
-            RW_property(ov::key_cache_precision.name()),
-            RW_property(ov::value_cache_precision.name()),
-            RW_property(ov::key_cache_group_size.name()),
-            RW_property(ov::value_cache_group_size.name()),
-        };
+        std::vector<ov::PropertyName> rwProperties{RW_property(ov::num_streams.name()),
+                                                   RW_property(ov::inference_num_threads.name()),
+                                                   RW_property(ov::enable_profiling.name()),
+                                                   RW_property(ov::hint::inference_precision.name()),
+                                                   RW_property(ov::hint::performance_mode.name()),
+                                                   RW_property(ov::hint::execution_mode.name()),
+                                                   RW_property(ov::hint::num_requests.name()),
+                                                   RW_property(ov::hint::enable_cpu_pinning.name()),
+                                                   RW_property(ov::hint::enable_cpu_reservation.name()),
+                                                   RW_property(ov::hint::scheduling_core_type.name()),
+                                                   RW_property(ov::hint::model_distribution_policy.name()),
+                                                   RW_property(ov::hint::enable_hyper_threading.name()),
+                                                   RW_property(ov::device::id.name()),
+                                                   RW_property(ov::intel_cpu::denormals_optimization.name()),
+                                                   RW_property(ov::log::level.name()),
+                                                   RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
+                                                   RW_property(ov::intel_cpu::enable_tensor_parallel.name()),
+                                                   RW_property(ov::hint::dynamic_quantization_group_size.name()),
+                                                   RW_property(ov::hint::kv_cache_precision.name()),
+                                                   RW_property(ov::key_cache_precision.name()),
+                                                   RW_property(ov::value_cache_precision.name()),
+                                                   RW_property(ov::key_cache_group_size.name()),
+                                                   RW_property(ov::value_cache_group_size.name())};
+
+        std::vector<ov::PropertyName> wo_properties{WO_property(ov::weights_path.name())};
 
         std::vector<ov::PropertyName> supportedProperties;
-        supportedProperties.reserve(roProperties.size() + rwProperties.size());
+        supportedProperties.reserve(roProperties.size() + rwProperties.size() + wo_properties.size());
         supportedProperties.insert(supportedProperties.end(), roProperties.begin(), roProperties.end());
         supportedProperties.insert(supportedProperties.end(), rwProperties.begin(), rwProperties.end());
+        supportedProperties.insert(supportedProperties.end(), wo_properties.begin(), wo_properties.end());
 
         return decltype(ov::supported_properties)::value_type(std::move(supportedProperties));
     }
@@ -691,24 +701,94 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     return res;
 }
 
-std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
-    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
+std::string get_origin_weights_path(const ov::AnyMap& config) {
+    ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
+    std::string origin_weights_path;
+
+    auto cm_it = config.find(ov::cache_mode.name());
+    if (cm_it != config.end()) {
+        cache_mode = cm_it->second.as<ov::CacheMode>();
+        if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE) {
+            auto wp_it = config.find(ov::weights_path.name());
+            if (wp_it != config.end()) {
+                origin_weights_path = wp_it->second.as<std::string>();
+            }
+        }
+    }
+
+    return origin_weights_path;
+}
+
+bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) {
+    bool decrypt_from_string = false;
 
-    CacheDecrypt decrypt{codec_xor};
-    bool decript_from_string = false;
     if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) {
         const auto& encryption_callbacks = it->second.as<EncryptionCallbacks>();
         decrypt.m_decrypt_str = encryption_callbacks.decrypt;
-        decript_from_string = true;
+        decrypt_from_string = true;
     }
 
+    return decrypt_from_string;
+}
+
+std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
+
+    CacheDecrypt decrypt{codec_xor};
+    auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt);
+    const auto origin_weights_path = get_origin_weights_path(config);
+
     ModelDeserializer deserializer(
         model_stream,
-        [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
-            return get_core()->read_model(model, weights);
+        [this](
+            const std::shared_ptr<ov::AlignedBuffer>& model,
+            const std::shared_ptr<ov::AlignedBuffer>& weights,
+            const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
+            if (origin_weights == nullptr) {
+                return get_core()->read_model(model, weights);
+            } else {
+                // Custom deserialization for weightless mode
+
+                pugi::xml_document xml_doc;
+                const auto root = [&] {
+                    auto res =
+                        xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
+                    OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
+                    return xml_doc.document_element();
+                }();
+                const auto opsets = [] {
+                    std::unordered_map<std::string, ov::OpSet> opsets;
+                    for (const auto& [name, mk_opset] : ov::get_available_opsets()) {
+                        opsets[name] = mk_opset();
+                    }
+                    return opsets;
+                }();
+                const auto version = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(root, "version", 0));
+
+                auto create_extensions_map =
+                    [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
+                    std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
+                    std::vector<ov::Extension::Ptr> m_extensions;
+                    OV_CREATE_EXTENSION(m_extensions);
+                    for (const auto& ext : m_extensions) {
+                        if (auto base_ext = std::dynamic_pointer_cast<ov::BaseOpExtension>(ext))
+                            exts.insert({base_ext->get_type_info(), base_ext});
+                    }
+                    return exts;
+                }();
+
+                std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> variables;
+                const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights;
+                XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version);
+                std::shared_ptr<ov::Model> model;
+                visitor.on_attribute("net", model);
+                model->get_rt_info()["version"] = int64_t(version);
+                return model;
+            }
         },
         decrypt,
-        decript_from_string);
+        decrypt_from_string,
+        origin_weights_path);
 
     return deserialize_model(deserializer, config);
 }
@@ -718,12 +798,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(const ov::Tensor& model
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
 
     CacheDecrypt decrypt{codec_xor};
-    bool decript_from_string = false;
-    if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) {
-        const auto& encryption_callbacks = it->second.as<EncryptionCallbacks>();
-        decrypt.m_decrypt_str = encryption_callbacks.decrypt;
-        decript_from_string = true;
-    }
+    auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt);
+    const auto origin_weights_path = get_origin_weights_path(config);
 
     std::shared_ptr<ov::AlignedBuffer> model_buffer =
         std::make_shared<ov::SharedBuffer<ov::Tensor>>(reinterpret_cast<char*>(model_tensor.data()),
@@ -732,11 +808,55 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(const ov::Tensor& model
 
     ModelDeserializer deserializer(
         model_buffer,
-        [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
-            return get_core()->read_model(model, weights);
+        [this](
+            const std::shared_ptr<ov::AlignedBuffer>& model,
+            const std::shared_ptr<ov::AlignedBuffer>& weights,
+            const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
+            if (origin_weights == nullptr) {
+                return get_core()->read_model(model, weights);
+            } else {
+                // Custom deserialization for weightless mode
+
+                pugi::xml_document xml_doc;
+                const auto root = [&] {
+                    auto res =
+                        xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
+                    OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
+                    return xml_doc.document_element();
+                }();
+                const auto opsets = [] {
+                    std::unordered_map<std::string, ov::OpSet> opsets;
+                    for (const auto& [name, mk_opset] : ov::get_available_opsets()) {
+                        opsets[name] = mk_opset();
+                    }
+                    return opsets;
+                }();
+                const auto version = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(root, "version", 0));
+
+                auto create_extensions_map =
+                    [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
+                    std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
+                    std::vector<ov::Extension::Ptr> m_extensions;
+                    OV_CREATE_EXTENSION(m_extensions);
+                    for (const auto& ext : m_extensions) {
+                        if (auto base_ext = std::dynamic_pointer_cast<ov::BaseOpExtension>(ext))
+                            exts.insert({base_ext->get_type_info(), base_ext});
+                    }
+                    return exts;
+                }();
+
+                std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> variables;
+                const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights;
+                XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version);
+                std::shared_ptr<ov::Model> model;
+                visitor.on_attribute("net", model);
+                model->get_rt_info()["version"] = int64_t(version);
+                return model;
+            }
         },
         decrypt,
-        decript_from_string);
+        decrypt_from_string,
+        origin_weights_path);
 
     return deserialize_model(deserializer, config);
 }
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index bf4a6fb87254af..e138460500428a 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -18,7 +18,7 @@
 #include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/so_ptr.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
-#include "utils/serialize.hpp"
+#include "utils/graph_serializer/deserializer.hpp"
 
 namespace ov::intel_cpu {
 
diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
similarity index 58%
rename from src/plugins/intel_cpu/src/utils/serialize.cpp
rename to src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
index aa60590beeaf9e..8a45c184b129bc 100644
--- a/src/plugins/intel_cpu/src/utils/serialize.cpp
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
@@ -2,60 +2,45 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "serialize.hpp"
+#include "deserializer.hpp"
 
 #include <cstddef>
 #include <cstring>
 #include <functional>
 #include <istream>
 #include <memory>
-#include <ostream>
 #include <string>
 #include <utility>
 #include <variant>
 
+#include "openvino/core/any.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/core/memory_util.hpp"
 #include "openvino/core/model.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "openvino/core/shape.hpp"
 #include "openvino/core/type/element_type.hpp"
+#include "openvino/op/convert.hpp"
 #include "openvino/pass/serialize.hpp"
 #include "openvino/runtime/aligned_buffer.hpp"
 #include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/runtime/tensor.hpp"
+#include "openvino/util/mmap_object.hpp"
+#include "openvino/util/xml_parse_utils.hpp"
+#include "openvino/xml_util/xml_deserialize_util.hpp"
 #include "utils/codec_xor.hpp"
 
 namespace ov::intel_cpu {
 
-////////// ModelSerializer //////////
-
-ModelSerializer::ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn)
-    : ov::pass::StreamSerialize(
-          ostream,
-          [](std::ostream& stream) {
-              pugi::xml_document xml_doc;
-              pugi::xml_node root = xml_doc.append_child("cnndata");
-              root.append_child("outputs");
-              xml_doc.save(stream);
-          },
-          encrypt_fn) {};
-
-void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
-    run_on_model(std::const_pointer_cast<ov::Model>(model->clone()));
-}
-
-bool ModelSerializer::use_absolute_offset() {
-    return false;
-}
-
-////////// ModelDeserializer //////////
-
 ModelDeserializer::ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_buffer,
                                      ModelBuilder fn,
                                      const CacheDecrypt& decrypt_fn,
-                                     bool decript_from_string)
+                                     bool decript_from_string,
+                                     std::string origin_weights_path)
     : m_model(model_buffer),
       m_model_builder(std::move(fn)),
-      m_decript_from_string(decript_from_string) {
+      m_decript_from_string(decript_from_string),
+      m_origin_weights_path(std::move(origin_weights_path)) {
     if (m_decript_from_string) {
         m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
     } else {
@@ -66,10 +51,12 @@ ModelDeserializer::ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_b
 ModelDeserializer::ModelDeserializer(std::istream& model_stream,
                                      ModelBuilder fn,
                                      const CacheDecrypt& decrypt_fn,
-                                     bool decript_from_string)
+                                     bool decript_from_string,
+                                     std::string origin_weights_path)
     : m_model(model_stream),
       m_model_builder(std::move(fn)),
-      m_decript_from_string(decript_from_string) {
+      m_decript_from_string(decript_from_string),
+      m_origin_weights_path(std::move(origin_weights_path)) {
     if (m_decript_from_string) {
         m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
     } else {
@@ -124,6 +111,13 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
                                                                                    model_buffer);
     }
 
+    std::shared_ptr<ov::AlignedBuffer> origin_weights_buf;
+    if (!m_origin_weights_path.empty()) {
+        auto mmap = ov::load_mmap_object(m_origin_weights_path);
+        origin_weights_buf =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
+    }
+
     // XML content
     auto xml_buff = std::make_shared<std::string>();
     if (m_cache_decrypt) {
@@ -140,7 +134,7 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
     std::shared_ptr<ov::AlignedBuffer> model_buf =
         std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>((*xml_buff).data(), hdr.model_size, xml_buff);
 
-    model = m_model_builder(model_buf, weights_buf);
+    model = m_model_builder(model_buf, weights_buf, origin_weights_buf);
 
     // Set Info
     pugi::xml_node root = xml_in_out_doc.child("cnndata");
@@ -186,6 +180,13 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
         model_stream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
     }
 
+    std::shared_ptr<ov::AlignedBuffer> origin_weights_buf;
+    if (!m_origin_weights_path.empty()) {
+        auto mmap = ov::load_mmap_object(m_origin_weights_path);
+        origin_weights_buf =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
+    }
+
     // read XML content
     auto xml_string = std::make_shared<std::string>();
     model_stream.seekg(hdr.model_offset + hdr_pos);
@@ -210,10 +211,89 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
         hdr.consts_size,
         data_blob);
 
-    model = m_model_builder(model_buf, weights_buf);
+    model = m_model_builder(model_buf, weights_buf, origin_weights_buf);
 
     // Set Info
     pugi::xml_node root = xmlInOutDoc.child("cnndata");
     set_info(root, model);
 };
+
+ov::Any XmlDeserializer::parse_weightless_cache_attribute(const pugi::xml_node& node) const {
+    if (auto rt_info = node.child("rt_info")) {
+        for (const auto& child : rt_info.children()) {
+            for (const auto& attr : child.attributes()) {
+                if (strcmp(attr.name(), "name") == 0 &&
+                    strcmp(attr.value(), ov::WeightlessCacheAttribute::get_type_info_static().name) == 0) {
+                    const auto origin_size = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(child, "size"));
+                    const auto offset = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(child, "offset"));
+                    const ov::element::Type original_dt(child.attribute("type").value());  // "element_type"?
+                    return {ov::WeightlessCacheAttribute{origin_size, offset, original_dt}};
+                }
+            }
+        }
+    }
+    return {};
+}
+
+void XmlDeserializer::set_constant_num_buffer(ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>& adapter) {
+    OPENVINO_ASSERT(get_weights() != nullptr || m_origin_weights != nullptr,
+                    "Empty weights data in bin file or bin file cannot be found!");
+    const auto& node = get_node();
+    const auto dn = node.child("data");
+    const element::Type target_dtype{ov::util::pugixml::get_str_attr(dn, "element_type")};
+
+    // wlc -> weightless cache
+    bool is_wlc_way = target_dtype != element::string && m_origin_weights != nullptr;
+    ov::Any wlc;
+    if (is_wlc_way) {
+        wlc = parse_weightless_cache_attribute(node);
+        is_wlc_way &= !wlc.empty() && wlc.is<ov::WeightlessCacheAttribute>();
+    }
+
+    if (is_wlc_way) {
+        const auto& wlc_attribute = wlc.as<ov::WeightlessCacheAttribute>();
+
+        auto actual_size = wlc_attribute.original_size;
+        auto offset = wlc_attribute.bin_offset;
+        auto w_size = m_origin_weights->size();
+        OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!");
+
+        auto original_dtype = wlc_attribute.original_dtype;
+        char* data = m_origin_weights->get_ptr<char>() + offset;
+
+        ov::Shape shape;
+        OPENVINO_ASSERT(getParameters<size_t>(dn, "shape", shape),
+                        "[ CPU ] Could not get attribute 'shape' during weights deserialization.");
+
+        if (original_dtype != target_dtype) {
+            const auto org_tensor = ov::Tensor(original_dtype, shape, data);
+            auto converted_weights =
+                std::make_shared<ov::AlignedBuffer>(ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
+            auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}};
+            auto convert = op::v0::Convert();
+            OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported");
+            adapter.set(converted_weights);
+        } else {
+            if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) {
+                const auto type = ov::util::pugixml::get_str_attr(get_node(), "type");
+                OPENVINO_THROW("Attribute and shape size are inconsistent for ",
+                               type,
+                               " op!",
+                               actual_size,
+                               ", ",
+                               ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3),
+                               ", ",
+                               ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
+            }
+
+            auto buffer = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(data,
+                                                                                                 actual_size,
+                                                                                                 m_origin_weights);
+            adapter.set(buffer);
+        }
+    } else {
+        ov::util::XmlDeserializer::set_constant_num_buffer(adapter);
+    }
+}
+
 }  // namespace ov::intel_cpu
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
new file mode 100644
index 00000000000000..93476c24e5b6fc
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <istream>
+#include <pugixml.hpp>
+#include <string>
+#include <variant>
+
+#include "../xml_util/include/openvino/xml_util/xml_deserialize_util.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/util/xml_parse_utils.hpp"
+#include "utils/codec_xor.hpp"
+
+namespace ov::intel_cpu {
+
+template <class T>
+void str_to_container(const std::string& value, T& res) {
+    std::stringstream ss(value);
+    std::string field;
+    while (getline(ss, field, ',')) {
+        OPENVINO_ASSERT(!field.empty(), "Cannot get vector of parameters! \"", value, "\" is incorrect");
+        std::stringstream fs(field);
+        typename T::value_type val;
+        fs >> val;
+        res.insert(res.end(), val);
+    }
+}
+
+template <class T>
+bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector<T>& value) {
+    str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value);
+    return true;
+}
+
+class XmlDeserializer : public ov::util::XmlDeserializer {
+public:
+    explicit XmlDeserializer(const pugi::xml_node& node,
+                             const std::shared_ptr<ov::AlignedBuffer>& weights,
+                             const std::shared_ptr<ov::AlignedBuffer>& origin_weights,
+                             const std::unordered_map<std::string, ov::OpSet>& opsets,
+                             const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions,
+                             std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>>& variables,
+                             size_t version)
+        : ov::util::XmlDeserializer(node, weights, opsets, extensions, variables, version),
+          m_origin_weights{origin_weights} {}
+
+    explicit XmlDeserializer(const pugi::xml_node& node,
+                             const std::shared_ptr<ov::AlignedBuffer>& weights,
+                             const std::unordered_map<std::string, ov::OpSet>& opsets,
+                             const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions,
+                             std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>>& variables,
+                             size_t version)
+        : XmlDeserializer(node, weights, nullptr, opsets, extensions, variables, version) {}
+
+protected:
+    ov::Any parse_weightless_cache_attribute(const pugi::xml_node& node) const override;
+
+    void set_constant_num_buffer(ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>& adapter) override;
+
+private:
+    std::unique_ptr<ov::util::XmlDeserializer> make_visitor(
+        const pugi::xml_node& node,
+        const std::shared_ptr<ov::AlignedBuffer>& weights,
+        const std::unordered_map<std::string, ov::OpSet>& opsets,
+        const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions,
+        std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>>& variables,
+        size_t version) const override {
+        return std::make_unique<XmlDeserializer>(node,
+                                                 weights,
+                                                 m_origin_weights,
+                                                 opsets,
+                                                 extensions,
+                                                 variables,
+                                                 version);
+    }
+
+    std::shared_ptr<ov::AlignedBuffer> m_origin_weights;
+};
+
+class ModelDeserializer {
+public:
+    using ModelBuilder = std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
+                                                                  const std::shared_ptr<ov::AlignedBuffer>&,
+                                                                  const std::shared_ptr<ov::AlignedBuffer>&)>;
+
+    ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_buffer,
+                      ModelBuilder fn,
+                      const CacheDecrypt& decrypt_fn,
+                      bool decript_from_string,
+                      std::string origin_weights_path = "");
+
+    ModelDeserializer(std::istream& model_stream,
+                      ModelBuilder fn,
+                      const CacheDecrypt& decrypt_fn,
+                      bool decript_from_string,
+                      std::string origin_weights_path = "");
+
+    virtual ~ModelDeserializer() = default;
+
+    void operator>>(std::shared_ptr<ov::Model>& model);
+
+protected:
+    static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
+
+    void process_model(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& model_buffer);
+    void process_model(std::shared_ptr<ov::Model>& model, std::reference_wrapper<std::istream> model_stream);
+
+    std::variant<std::shared_ptr<ov::AlignedBuffer>, std::reference_wrapper<std::istream>> m_model;
+    ModelBuilder m_model_builder;
+    CacheDecrypt m_cache_decrypt;
+    bool m_decript_from_string;
+    std::string m_origin_weights_path;
+};
+
+}  //  namespace ov::intel_cpu
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp
new file mode 100644
index 00000000000000..114991ec59ea78
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp
@@ -0,0 +1,181 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "serializer.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include "openvino/core/model.hpp"
+#include "openvino/core/node.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
+#include "openvino/core/runtime_attribute.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/core/type/element_type.hpp"
+#include "openvino/pass/serialize.hpp"
+#include "openvino/xml_util/constant_writer.hpp"
+#include "openvino/xml_util/xml_serialize_util.hpp"
+
+namespace ov::intel_cpu {
+
+class WeightlessWriter : public util::ConstantWriter {
+public:
+    explicit WeightlessWriter(util::ConstantWriter& other) : util::ConstantWriter(other), m_offset{} {}
+
+    WeightlessWriter(std::ostream& bin_file) : util::ConstantWriter(bin_file), m_offset{} {}
+
+    WeightlessWriter::FilePosition write([[maybe_unused]] const char* ptr,
+                                         size_t size,
+                                         size_t& new_size,
+                                         [[maybe_unused]] bool compress_to_fp16,
+                                         [[maybe_unused]] ov::element::Type src_type,
+                                         [[maybe_unused]] bool ptr_is_temporary) override {
+        WeightlessWriter::FilePosition offset = 0L;
+
+        if (m_skip_weights) {
+            new_size = 0LU;
+            offset = m_offset;
+            m_offset += size;
+        } else {
+            offset = util::ConstantWriter::write(ptr, size, new_size, compress_to_fp16, src_type, ptr_is_temporary);
+        }
+
+        return offset;
+    }
+
+    void skip_weights(bool skip_weights) {
+        m_skip_weights = skip_weights;
+    }
+
+private:
+    WeightlessWriter::FilePosition m_offset;
+    bool m_skip_weights = false;
+};
+
+class XmlSerializer : public util::XmlSerializer {
+public:
+    XmlSerializer(pugi::xml_node& data,
+                  const std::string& node_type_name,
+                  util::ConstantWriter& constant_write_handler,
+                  int64_t version,
+                  bool deterministic = false,
+                  bool compress_to_fp16 = false,
+                  ov::element::Type output_element_type = ov::element::dynamic,
+                  bool data_is_temporary = false,
+                  bool wl_mode = false)
+        : util::XmlSerializer(data,
+                              node_type_name,
+                              constant_write_handler,
+                              version,
+                              deterministic,
+                              compress_to_fp16,
+                              output_element_type,
+                              data_is_temporary),
+          m_weightless_const_writer(constant_write_handler),
+          m_weightless_mode(wl_mode) {}
+
+private:
+    bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute) override {
+        bool result = false;
+        if (const auto* wl_attr = ov::as_type<const ov::WeightlessCacheAttribute>(&attribute)) {
+            m_weightless_const_writer.skip_weights(true);
+
+            const auto& type_info = attribute.get_type_info();
+            node.append_attribute("name").set_value(type_info.name);
+            node.append_attribute("version").set_value(type_info.get_version().data());
+            node.append_attribute("type").set_value(util::get_ir_precision_name(wl_attr->original_dtype).data());
+            node.append_attribute("offset").set_value(wl_attr->bin_offset);
+            node.append_attribute("size").set_value(wl_attr->original_size);
+
+            result = true;
+        } else {
+            result = util::XmlSerializer::append_rt_attribute(node, attribute);
+        }
+
+        return result;
+    }
+
+    bool append_node_attributes(ov::Node& node) override {
+        m_weightless_const_writer.skip_weights(
+            m_weightless_mode && node.get_rt_info().count(ov::WeightlessCacheAttribute::get_type_info_static()) != 0);
+
+        auto result = util::XmlSerializer::append_node_attributes(node);
+
+        return result;
+    }
+
+    ov::util::ConstantWriter& get_constant_write_handler() override {
+        return m_weightless_const_writer;
+    }
+
+    std::unique_ptr<util::XmlSerializer> make_visitor(pugi::xml_node& data,
+                                                      const std::string& node_type_name,
+                                                      util::ConstantWriter& constant_write_handler,
+                                                      int64_t version,
+                                                      bool deterministic,
+                                                      bool compress_to_fp16,
+                                                      ov::element::Type output_element_type,
+                                                      bool data_is_temporary) const override {
+        return std::make_unique<XmlSerializer>(data,
+                                               node_type_name,
+                                               constant_write_handler,
+                                               version,
+                                               deterministic,
+                                               compress_to_fp16,
+                                               output_element_type,
+                                               data_is_temporary,
+                                               m_weightless_mode);
+    }
+
+    WeightlessWriter m_weightless_const_writer;
+    bool m_weightless_mode = false;
+};
+
+////////// ModelSerializer //////////
+
+ModelSerializer::ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn, bool weightless_mode)
+    : ov::pass::StreamSerialize(
+          ostream,
+          [](std::ostream& stream) {
+              pugi::xml_document xml_doc;
+              pugi::xml_node root = xml_doc.append_child("cnndata");
+              root.append_child("outputs");
+              xml_doc.save(stream);
+          },
+          encrypt_fn),
+      m_weightless_mode(weightless_mode) {};
+
+void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
+    run_on_model(std::const_pointer_cast<ov::Model>(model->clone()));
+}
+
+bool ModelSerializer::use_absolute_offset() {
+    return false;
+}
+
+std::unique_ptr<util::XmlSerializer> ModelSerializer::make_serializer(pugi::xml_node& data,
+                                                                      const std::string& node_type_name,
+                                                                      util::ConstantWriter& constant_write_handler,
+                                                                      int64_t version,
+                                                                      bool deterministic,
+                                                                      bool compress_to_fp16,
+                                                                      ov::element::Type output_element_type,
+                                                                      bool data_is_temporary) const {
+    return std::make_unique<XmlSerializer>(data,
+                                           node_type_name,
+                                           constant_write_handler,
+                                           version,
+                                           deterministic,
+                                           compress_to_fp16,
+                                           output_element_type,
+                                           data_is_temporary,
+                                           m_weightless_mode);
+}
+
+}  // namespace ov::intel_cpu
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp
new file mode 100644
index 00000000000000..bd10f5c20d0ffd
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ostream>
+#include <pugixml.hpp>
+#include <string>
+
+#include "openvino/core/model.hpp"
+#include "openvino/pass/serialize.hpp"
+
+namespace ov::intel_cpu {
+
+class ModelSerializer : private ov::pass::StreamSerialize {
+public:
+    using CacheEncrypt = std::function<std::string(const std::string&)>;
+
+    explicit ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn = {}, bool weightless_mode = false);
+
+    void operator<<(const std::shared_ptr<ov::Model>& model);
+
+private:
+    bool use_absolute_offset() override;
+
+    std::unique_ptr<util::XmlSerializer> make_serializer(pugi::xml_node& data,
+                                                         const std::string& node_type_name,
+                                                         util::ConstantWriter& constant_write_handler,
+                                                         int64_t version,
+                                                         bool deterministic,
+                                                         bool compress_to_fp16,
+                                                         ov::element::Type output_element_type,
+                                                         bool data_is_temporary) const override;
+
+    bool m_weightless_mode;
+};
+
+}  // namespace ov::intel_cpu
diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp
deleted file mode 100644
index cd6789a5415ed2..00000000000000
--- a/src/plugins/intel_cpu/src/utils/serialize.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (C) 2018-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-
-#include <functional>
-#include <istream>
-#include <memory>
-#include <ostream>
-#include <pugixml.hpp>
-#include <string>
-#include <variant>
-
-#include "openvino/core/model.hpp"
-#include "openvino/pass/serialize.hpp"
-#include "openvino/runtime/aligned_buffer.hpp"
-#include "utils/codec_xor.hpp"
-
-namespace ov::intel_cpu {
-
-class ModelSerializer : private ov::pass::StreamSerialize {
-public:
-    using CacheEncrypt = std::function<std::string(const std::string&)>;
-
-    explicit ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn = {});
-
-    void operator<<(const std::shared_ptr<ov::Model>& model);
-
-private:
-    bool use_absolute_offset() override;
-};
-
-class ModelDeserializer {
-public:
-    using ModelBuilder = std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
-                                                                  const std::shared_ptr<ov::AlignedBuffer>&)>;
-
-    ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_buffer,
-                      ModelBuilder fn,
-                      const CacheDecrypt& decrypt_fn,
-                      bool decript_from_string);
-
-    ModelDeserializer(std::istream& model_stream,
-                      ModelBuilder fn,
-                      const CacheDecrypt& decrypt_fn,
-                      bool decript_from_string);
-
-    virtual ~ModelDeserializer() = default;
-
-    void operator>>(std::shared_ptr<ov::Model>& model);
-
-protected:
-    static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
-
-    void process_model(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& model_buffer);
-    void process_model(std::shared_ptr<ov::Model>& model, std::reference_wrapper<std::istream> model_stream);
-
-    std::variant<std::shared_ptr<ov::AlignedBuffer>, std::reference_wrapper<std::istream>> m_model;
-    ModelBuilder m_model_builder;
-    CacheDecrypt m_cache_decrypt;
-    bool m_decript_from_string;
-};
-
-}  // namespace ov::intel_cpu
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index 30d1a364cf33f0..b78ef0204bb19a 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -53,7 +53,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
         RO_property(ov::key_cache_precision.name()),
         RO_property(ov::value_cache_precision.name()),
         RO_property(ov::key_cache_group_size.name()),
-        RO_property(ov::value_cache_group_size.name()),
+        RO_property(ov::value_cache_group_size.name())
     };
 
     ov::Core ie;
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
index b909a77c2192ae..bb786f3828becc 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -26,6 +26,9 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
     auto RW_property = [](const std::string& propertyName) {
         return ov::PropertyName(propertyName, ov::PropertyMutability::RW);
     };
+    auto WO_property = [](const std::string& propertyName) {
+        return ov::PropertyName(propertyName, ov::PropertyMutability::WO);
+    };
 
     std::vector<ov::PropertyName> expectedSupportedProperties{
         // read only
@@ -38,6 +41,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
         RO_property(ov::device::capabilities.name()),
         RO_property(ov::device::type.name()),
         RO_property(ov::device::architecture.name()),
+        // Write only
+        WO_property(ov::weights_path.name()),
         // read write
         RW_property(ov::num_streams.name()),
         RW_property(ov::inference_num_threads.name()),
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp
new file mode 100644
index 00000000000000..7ce2c2848519d5
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "behavior/compiled_model/model_cache.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ov::test::behavior;
+
+INSTANTIATE_TEST_SUITE_P(smoke_,
+                         WeightlessCacheAccuracy,
+                         ::testing::Combine(::testing::Bool(),
+                                            ::testing::Bool(),
+                                            ::testing::ValuesIn(inference_modes),
+                                            ::testing::ValuesIn(model_dtypes),
+                                            ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                         WeightlessCacheAccuracy::get_test_case_name);
+
+INSTANTIATE_TEST_SUITE_P(smoke_,
+                         WeightlessCacheAccuracyLowPrecision,
+                         ::testing::Combine(::testing::Bool(),
+                                            ::testing::Bool(),
+                                            ::testing::ValuesIn(inference_modes),
+                                            ::testing::ValuesIn(low_precision_dtypes),
+                                            ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                         WeightlessCacheAccuracy::get_test_case_name);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 48d3c5dd16b612..fee18f63d451f3 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -296,6 +296,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*FC_3D_BF16.*MatMulLayerCPUTest.*)",
         // Issue: 163242
         R"(.*bf16.*RNNSequenceCPUTest.*)",
+        R"(.*WeightlessCacheAccuracy.TiWithLstmCell.*model_dtype=bf16.*)",
         // Issue: 163250
         R"(.*OnnxModelWithExtensionFromDSO.*)",
         // Issue: 163273
@@ -368,6 +369,7 @@ std::vector<std::string> disabledTestPatterns() {
     retVector.emplace_back(R"(.*smoke_ConcatSDPTransposeByChannelTest.*)");
     // Issue: 168490
     retVector.emplace_back(R"(.*CPU/CoreThreadingTest.smoke_QueryModel.*)");
+    retVector.emplace_back(R"(.*WeightlessCacheAccuracy.*)");
 #endif
 
 #if defined(OPENVINO_ARCH_ARM)
diff --git a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
index 921125d9916d0f..623a3aee798467 100644
--- a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
+++ b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt
@@ -74,6 +74,7 @@ ov_add_test_target(
             openvino::shape_inference
             openvino_runtime_s
             unit_test_utils
+            openvino_xml_util
             ov_snippets_models
             snippets_test_utils
             ${MLAS_LIBRARY}
diff --git a/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt b/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt
index 88689b18b97e58..428a257eb5694a 100644
--- a/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt
+++ b/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt
@@ -48,6 +48,7 @@ ov_add_test_target(
             dnnl
             gmock
             openvino_runtime_s
+            openvino_xml_util
             unit_test_utils
             ov_snippets_models
             snippets_test_utils
diff --git a/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp b/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp
new file mode 100644
index 00000000000000..750393b8ef8cab
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+#include "openvino/core/type/element_type.hpp"
+
+namespace ov {
+namespace test {
+namespace behavior {
+
+typedef std::tuple<bool, bool, ov::element::Type, ov::element::Type, const char*> WeightlessCacheAccuracyTestParams;
+
+class WeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface<WeightlessCacheAccuracyTestParams> {
+public:
+    static std::string get_test_case_name(const ::testing::TestParamInfo<WeightlessCacheAccuracyTestParams>& obj);
+
+protected:
+    std::shared_ptr<ov::Model> m_model;
+    std::string m_xml_path;
+    std::string m_bin_path;
+    std::string m_cache_path;
+    std::string m_cache_dir;
+    const char* m_target_device;
+    bool m_use_compile_model_api;
+    bool m_do_encryption;
+    ov::element::Type m_inference_mode;
+    ov::element::Type m_model_dtype;
+
+    void SetUp() override;
+    void TearDown() override;
+    void run();
+};
+
+class WeightlessCacheAccuracyLowPrecision : public WeightlessCacheAccuracy {};
+
+static const std::vector<ov::element::Type> inference_modes = {
+    ov::element::f32,
+    ov::element::f16,
+};
+
+static const std::vector<ov::element::Type> model_dtypes = {
+    ov::element::f32,
+    ov::element::f16,
+    ov::element::bf16,
+};
+
+static const std::vector<ov::element::Type> low_precision_dtypes = {
+    ov::element::u8,
+    ov::element::u4,
+    ov::element::i4,
+};
+
+}  // namespace behavior
+}  // namespace test
+}  // namespace ov
diff --git a/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp b/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp
new file mode 100644
index 00000000000000..e8c8ba06de430c
--- /dev/null
+++ b/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp
@@ -0,0 +1,190 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "behavior/compiled_model/model_cache.hpp"
+
+#include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp"
+#include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp"
+#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
+#include "common_test_utils/test_assertions.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/util/codec_xor.hpp"
+#include "shared_test_classes/subgraph/weights_decompression_builders.hpp"
+
+namespace ov {
+namespace test {
+namespace behavior {
+
+std::string WeightlessCacheAccuracy::get_test_case_name(const ::testing::TestParamInfo<WeightlessCacheAccuracyTestParams>& obj) {
+    std::ostringstream result;
+
+    result << "use_compile_model_api=" << utils::bool2str(std::get<0>(obj.param));
+    result << "_do_encryption="        << utils::bool2str(std::get<1>(obj.param));
+    result << "_inference_mode="       << std::get<2>(obj.param);
+    result << "_model_dtype="          << std::get<3>(obj.param);
+    result << "_device="               << std::get<4>(obj.param);
+
+    return result.str();
+}
+
+void WeightlessCacheAccuracy::SetUp() {
+    std::string filePrefix = ov::test::utils::generateTestFilePrefix();
+    m_xml_path = filePrefix + ".xml";
+    m_bin_path = filePrefix + ".bin";
+    m_cache_path = filePrefix + ".blob";
+    m_cache_dir = filePrefix + "_cache_dir";
+
+    std::tie(m_use_compile_model_api, m_do_encryption, m_inference_mode, m_model_dtype, m_target_device) = GetParam();
+}
+
+void WeightlessCacheAccuracy::TearDown() {
+    std::remove(m_xml_path.c_str());
+    std::remove(m_bin_path.c_str());
+    std::remove(m_cache_path.c_str());
+
+    ov::test::utils::removeFilesWithExt(m_cache_dir, "blob");
+    ov::test::utils::removeFilesWithExt(m_cache_dir, "cl_cache");
+    ov::test::utils::removeDir(m_cache_dir);
+}
+
+void WeightlessCacheAccuracy::run() {
+    ov::AnyMap config = {ov::cache_dir(m_cache_dir),
+                         ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+                         ov::hint::inference_precision(m_inference_mode)};
+    ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+                                           ov::weights_path(m_bin_path),
+                                           ov::hint::inference_precision(m_inference_mode)};
+
+    if (m_do_encryption) {
+        ov::EncryptionCallbacks encryption_callbacks;
+        encryption_callbacks.encrypt = ov::util::codec_xor;
+        encryption_callbacks.decrypt = ov::util::codec_xor;
+        config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+        config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+    }
+    auto core = ov::test::utils::PluginCache::get().core();
+    ov::pass::Serialize(m_xml_path, m_bin_path).run_on_model(m_model);
+
+    auto compiled_model = core->compile_model(m_xml_path, m_target_device, config);
+
+    if (!m_use_compile_model_api) {
+        auto ofstr = std::ofstream(m_cache_path, std::ofstream::binary);
+        compiled_model.export_model(ofstr);
+        ofstr.close();
+    }
+
+    auto get_cache_path = [&]() {
+        std::string path;
+        if (m_use_compile_model_api) {
+            auto blobs = ov::test::utils::listFilesWithExt(m_cache_dir, "blob");
+            EXPECT_EQ(blobs.size(), 1);
+            path = blobs[0];
+        } else {
+            path = m_cache_path;
+        }
+        return path;
+    };
+
+    auto get_mod_time = [&](const std::string& path) {
+        struct stat result;
+        if (stat(path.c_str(), &result) == 0) {
+            return result.st_mtime;
+        }
+        return static_cast<time_t>(0);
+    };
+
+    auto first_cache_path = get_cache_path();
+    auto first_mod_time = get_mod_time(first_cache_path);
+    ASSERT_NE(first_mod_time, static_cast<time_t>(0));
+
+    ov::CompiledModel imported_model;
+    if (m_use_compile_model_api) {
+        imported_model = core->compile_model(m_xml_path, m_target_device, config);
+    } else {
+        auto ifstr = std::ifstream(m_cache_path, std::ifstream::binary);
+        imported_model = core->import_model(ifstr, m_target_device, config_with_weights_path);
+        ifstr.close();
+    }
+
+    auto second_cache_path = get_cache_path();
+    auto second_mod_time = get_mod_time(second_cache_path);
+
+    // Something went wrong if a new cache is created during the second run.
+    ASSERT_EQ(first_mod_time, second_mod_time);
+
+    auto orig_req = compiled_model.create_infer_request();
+    auto new_req = imported_model.create_infer_request();
+
+    for (size_t param_idx = 0; param_idx < m_model->get_parameters().size(); ++param_idx) {
+        auto input = m_model->get_parameters().at(param_idx);
+        auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(),
+                                                                                input->get_shape(),
+                                                                                -100,
+                                                                                100,
+                                                                                param_idx);
+        orig_req.set_tensor(input, tensor);
+        new_req.set_tensor(input, tensor);
+    }
+
+    orig_req.infer();
+    new_req.infer();
+
+    auto result_vector = m_model->get_results();
+    for (auto& res : result_vector) {
+        auto orig_out = orig_req.get_tensor(res);
+        auto new_out = new_req.get_tensor(res);
+        ov::test::utils::compare(orig_out, new_out, m_inference_mode);
+    }
+}
+
+TEST_P(WeightlessCacheAccuracy, ReadConcatSplitAssign) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, m_model_dtype));
+    OV_ASSERT_NO_THROW(run());
+}
+
+TEST_P(WeightlessCacheAccuracy, SingleConcatWithConstant) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, m_model_dtype));
+    OV_ASSERT_NO_THROW(run());
+}
+
+TEST_P(WeightlessCacheAccuracy, TiWithLstmCell) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_ti_with_lstm_cell(m_model_dtype));
+    OV_ASSERT_NO_THROW(run());
+}
+
+TEST_P(WeightlessCacheAccuracyLowPrecision, MatmulWeightsDecompression) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::test::MatMulDecompressionShapeParams shape_params{{{}, {{1, 4, 16}}}, {1, 16, 32}};
+    auto dynShape = shape_params.data_shape.first;
+    if (dynShape.rank() == 0) {
+        dynShape = shape_params.data_shape.second.front();
+    }
+    ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(ov::element::f32, dynShape)};
+    const auto weights_subgraph = ov::test::initMatMulDecompressionSubgraph(shape_params.weights_shape,
+                                                                            shape_params.decompression_group_size,
+                                                                            ov::element::f32,
+                                                                            m_model_dtype,
+                                                                            ov::element::f32,
+                                                                            ov::element::dynamic,
+                                                                            true,
+                                                                            ov::test::DecompressionType::full,
+                                                                            ov::test::DecompressionType::full,
+                                                                            false);
+    auto matmul = std::make_shared<ov::op::v0::MatMul>(params[0], weights_subgraph);
+
+    ov::ResultVector results;
+    for (const auto& output : matmul->outputs()) {
+        results.push_back(std::make_shared<ov::op::v0::Result>(output));
+    }
+    m_model = std::make_shared<ov::Model>(results, params, "MatmulWeightsDecompression");
+    OV_ASSERT_NO_THROW(run());
+}
+
+}  // namespace behavior
+}  // namespace test
+}  // namespace ov

From 653b16cb8c770b99677227a8d7ce6591458462e6 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Mon, 29 Sep 2025 14:36:13 +0400
Subject: [PATCH 2/3] Fixes as per comments

---
 .../openvino/xml_util/xml_deserialize_util.hpp | 14 ++++++++++++++
 src/core/xml_util/src/xml_deserialize_util.cpp | 16 +---------------
 src/plugins/intel_cpu/src/plugin.cpp           | 18 ++++++++----------
 .../utils/graph_serializer/deserializer.hpp    | 15 +--------------
 4 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
index 653810b0ca3eaf..0f4baafa15d86a 100644
--- a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
+++ b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp
@@ -21,6 +21,20 @@
 namespace ov::util {
 struct GenericLayerParams;
 
+template <class T>
+void str_to_container(const std::string& value, T& res) {
+    std::stringstream ss(value);
+    std::string field;
+    while (getline(ss, field, ',')) {
+        if (field.empty())
+            OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
+        std::stringstream fs(field);
+        typename T::value_type val;
+        fs >> val;
+        res.insert(res.end(), val);
+    }
+}
+
 class XmlDeserializer : public ov::AttributeVisitor {
 public:
     explicit XmlDeserializer(const pugi::xml_node& node,
diff --git a/src/core/xml_util/src/xml_deserialize_util.cpp b/src/core/xml_util/src/xml_deserialize_util.cpp
index 540bde2374bfdf..ad6a2e0b063f4b 100644
--- a/src/core/xml_util/src/xml_deserialize_util.cpp
+++ b/src/core/xml_util/src/xml_deserialize_util.cpp
@@ -42,26 +42,12 @@ bool getStrAttribute(const pugi::xml_node& node, const std::string& name, std::s
     return true;
 }
 
-template <class T>
-void str_to_container(const std::string& value, T& res) {
-    std::stringstream ss(value);
-    std::string field;
-    while (getline(ss, field, ',')) {
-        if (field.empty())
-            OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
-        std::stringstream fs(field);
-        typename T::value_type val;
-        fs >> val;
-        res.insert(res.end(), val);
-    }
-}
-
 template <class T>
 bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector<T>& value) {
     std::string param;
     if (!getStrAttribute(node, name, param))
         return false;
-    str_to_container(param, value);
+    ov::util::str_to_container(param, value);
     return true;
 }
 
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 226c198851e29a..39f15f6bfec6d5 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -701,7 +701,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     return res;
 }
 
-std::string get_origin_weights_path(const ov::AnyMap& config) {
+static std::string get_origin_weights_path(const ov::AnyMap& config) {
     ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
     std::string origin_weights_path;
 
@@ -719,7 +719,7 @@ std::string get_origin_weights_path(const ov::AnyMap& config) {
     return origin_weights_path;
 }
 
-bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) {
+static bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) {
     bool decrypt_from_string = false;
 
     if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) {
@@ -740,10 +740,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
 
     ModelDeserializer deserializer(
         model_stream,
-        [this](
-            const std::shared_ptr<ov::AlignedBuffer>& model,
-            const std::shared_ptr<ov::AlignedBuffer>& weights,
-            const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
+        [this](const std::shared_ptr<ov::AlignedBuffer>& model,
+               const std::shared_ptr<ov::AlignedBuffer>& weights,
+               const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
             if (origin_weights == nullptr) {
                 return get_core()->read_model(model, weights);
             } else {
@@ -808,10 +807,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(const ov::Tensor& model
 
     ModelDeserializer deserializer(
         model_buffer,
-        [this](
-            const std::shared_ptr<ov::AlignedBuffer>& model,
-            const std::shared_ptr<ov::AlignedBuffer>& weights,
-            const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
+        [this](const std::shared_ptr<ov::AlignedBuffer>& model,
+               const std::shared_ptr<ov::AlignedBuffer>& weights,
+               const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
             if (origin_weights == nullptr) {
                 return get_core()->read_model(model, weights);
             } else {
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
index 93476c24e5b6fc..9bc9c7a445c366 100644
--- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
@@ -17,22 +17,9 @@
 
 namespace ov::intel_cpu {
 
-template <class T>
-void str_to_container(const std::string& value, T& res) {
-    std::stringstream ss(value);
-    std::string field;
-    while (getline(ss, field, ',')) {
-        OPENVINO_ASSERT(!field.empty(), "Cannot get vector of parameters! \"", value, "\" is incorrect");
-        std::stringstream fs(field);
-        typename T::value_type val;
-        fs >> val;
-        res.insert(res.end(), val);
-    }
-}
-
 template <class T>
 bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector<T>& value) {
-    str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value);
+    ov::util::str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value);
     return true;
 }
 

From 46025379ffa2ae4291b58d81940d1acd900c93b3 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Thu, 2 Oct 2025 11:31:49 +0400
Subject: [PATCH 3/3] Fixes as per comments 2

---
 src/plugins/intel_cpu/src/plugin.cpp          | 110 +----------
 .../utils/graph_serializer/deserializer.cpp   | 186 +++++++++++-------
 .../utils/graph_serializer/deserializer.hpp   |  24 ++-
 3 files changed, 138 insertions(+), 182 deletions(-)

diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 39f15f6bfec6d5..e867f84fef18b3 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -720,15 +720,13 @@ static std::string get_origin_weights_path(const ov::AnyMap& config) {
 }
 
 static bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) {
-    bool decrypt_from_string = false;
-
     if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) {
         const auto& encryption_callbacks = it->second.as<EncryptionCallbacks>();
         decrypt.m_decrypt_str = encryption_callbacks.decrypt;
-        decrypt_from_string = true;
+        return true;
+    } else {
+        return false;
     }
-
-    return decrypt_from_string;
 }
 
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
@@ -738,56 +736,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
     auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt);
     const auto origin_weights_path = get_origin_weights_path(config);
 
-    ModelDeserializer deserializer(
-        model_stream,
-        [this](const std::shared_ptr<ov::AlignedBuffer>& model,
-               const std::shared_ptr<ov::AlignedBuffer>& weights,
-               const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
-            if (origin_weights == nullptr) {
-                return get_core()->read_model(model, weights);
-            } else {
-                // Custom deserialization for weightless mode
-
-                pugi::xml_document xml_doc;
-                const auto root = [&] {
-                    auto res =
-                        xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
-                    OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
-                    return xml_doc.document_element();
-                }();
-                const auto opsets = [] {
-                    std::unordered_map<std::string, ov::OpSet> opsets;
-                    for (const auto& [name, mk_opset] : ov::get_available_opsets()) {
-                        opsets[name] = mk_opset();
-                    }
-                    return opsets;
-                }();
-                const auto version = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(root, "version", 0));
-
-                auto create_extensions_map =
-                    [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
-                    std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
-                    std::vector<ov::Extension::Ptr> m_extensions;
-                    OV_CREATE_EXTENSION(m_extensions);
-                    for (const auto& ext : m_extensions) {
-                        if (auto base_ext = std::dynamic_pointer_cast<ov::BaseOpExtension>(ext))
-                            exts.insert({base_ext->get_type_info(), base_ext});
-                    }
-                    return exts;
-                }();
-
-                std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> variables;
-                const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights;
-                XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version);
-                std::shared_ptr<ov::Model> model;
-                visitor.on_attribute("net", model);
-                model->get_rt_info()["version"] = int64_t(version);
-                return model;
-            }
-        },
-        decrypt,
-        decrypt_from_string,
-        origin_weights_path);
+    ModelDeserializer deserializer(model_stream, get_core(), decrypt, decrypt_from_string, origin_weights_path);
 
     return deserialize_model(deserializer, config);
 }
@@ -805,56 +754,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(const ov::Tensor& model
                                                        model_tensor.get_byte_size(),
                                                        model_tensor);
 
-    ModelDeserializer deserializer(
-        model_buffer,
-        [this](const std::shared_ptr<ov::AlignedBuffer>& model,
-               const std::shared_ptr<ov::AlignedBuffer>& weights,
-               const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
-            if (origin_weights == nullptr) {
-                return get_core()->read_model(model, weights);
-            } else {
-                // Custom deserialization for weightless mode
-
-                pugi::xml_document xml_doc;
-                const auto root = [&] {
-                    auto res =
-                        xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
-                    OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
-                    return xml_doc.document_element();
-                }();
-                const auto opsets = [] {
-                    std::unordered_map<std::string, ov::OpSet> opsets;
-                    for (const auto& [name, mk_opset] : ov::get_available_opsets()) {
-                        opsets[name] = mk_opset();
-                    }
-                    return opsets;
-                }();
-                const auto version = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(root, "version", 0));
-
-                auto create_extensions_map =
-                    [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
-                    std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
-                    std::vector<ov::Extension::Ptr> m_extensions;
-                    OV_CREATE_EXTENSION(m_extensions);
-                    for (const auto& ext : m_extensions) {
-                        if (auto base_ext = std::dynamic_pointer_cast<ov::BaseOpExtension>(ext))
-                            exts.insert({base_ext->get_type_info(), base_ext});
-                    }
-                    return exts;
-                }();
-
-                std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> variables;
-                const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights;
-                XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version);
-                std::shared_ptr<ov::Model> model;
-                visitor.on_attribute("net", model);
-                model->get_rt_info()["version"] = int64_t(version);
-                return model;
-            }
-        },
-        decrypt,
-        decrypt_from_string,
-        origin_weights_path);
+    ModelDeserializer deserializer(model_buffer, get_core(), decrypt, decrypt_from_string, origin_weights_path);
 
     return deserialize_model(deserializer, config);
 }
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
index 8a45c184b129bc..ad12787c19739a 100644
--- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp
@@ -6,23 +6,32 @@
 
 #include <cstddef>
 #include <cstring>
+#include <filesystem>
 #include <functional>
 #include <istream>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <variant>
+#include <vector>
 
 #include "openvino/core/any.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/core/extension.hpp"
 #include "openvino/core/memory_util.hpp"
 #include "openvino/core/model.hpp"
+#include "openvino/core/op_extension.hpp"
 #include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "openvino/core/shape.hpp"
 #include "openvino/core/type/element_type.hpp"
+#include "openvino/core/type.hpp"
 #include "openvino/op/convert.hpp"
+#include "openvino/op/util/variable.hpp"
+#include "openvino/opsets/opset.hpp"
 #include "openvino/pass/serialize.hpp"
 #include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/runtime/icore.hpp"
 #include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/runtime/tensor.hpp"
 #include "openvino/util/mmap_object.hpp"
@@ -33,14 +42,19 @@
 namespace ov::intel_cpu {
 
 ModelDeserializer::ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_buffer,
-                                     ModelBuilder fn,
+                                     const std::shared_ptr<ov::ICore>& core,
                                      const CacheDecrypt& decrypt_fn,
                                      bool decript_from_string,
-                                     std::string origin_weights_path)
+                                     const std::string& origin_weights_path)
     : m_model(model_buffer),
-      m_model_builder(std::move(fn)),
-      m_decript_from_string(decript_from_string),
-      m_origin_weights_path(std::move(origin_weights_path)) {
+      m_core(core),
+      m_decript_from_string(decript_from_string) {
+    if (!origin_weights_path.empty() && std::filesystem::exists(origin_weights_path)) {
+        auto mmap = ov::load_mmap_object(origin_weights_path);
+        m_origin_weights_buf =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
+    }
+
     if (m_decript_from_string) {
         m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
     } else {
@@ -49,14 +63,19 @@ ModelDeserializer::ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_b
 }
 
 ModelDeserializer::ModelDeserializer(std::istream& model_stream,
-                                     ModelBuilder fn,
+                                     const std::shared_ptr<ov::ICore>& core,
                                      const CacheDecrypt& decrypt_fn,
                                      bool decript_from_string,
-                                     std::string origin_weights_path)
+                                     const std::string& origin_weights_path)
     : m_model(model_stream),
-      m_model_builder(std::move(fn)),
-      m_decript_from_string(decript_from_string),
-      m_origin_weights_path(std::move(origin_weights_path)) {
+      m_core(core),
+      m_decript_from_string(decript_from_string) {
+    if (!origin_weights_path.empty() && std::filesystem::exists(origin_weights_path)) {
+        auto mmap = ov::load_mmap_object(origin_weights_path);
+        m_origin_weights_buf =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
+    }
+
     if (m_decript_from_string) {
         m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
     } else {
@@ -74,6 +93,53 @@ void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
         m_model);
 }
 
+std::shared_ptr<ov::Model> ModelDeserializer::create_ov_model(
+    const std::shared_ptr<ov::AlignedBuffer>& model_buf,
+    const std::shared_ptr<ov::AlignedBuffer>& weights,
+    const std::shared_ptr<ov::AlignedBuffer>& origin_weights) {
+    if (origin_weights == nullptr) {
+        return m_core->read_model(model_buf, weights);
+    }
+
+    // Custom deserialization for weightless mode
+
+    pugi::xml_document xml_doc;
+    const auto root = [&] {
+        auto res =
+            xml_doc.load_buffer(model_buf->get_ptr(), model_buf->size(), pugi::parse_default, pugi::encoding_utf8);
+        OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
+        return xml_doc.document_element();
+    }();
+    const auto opsets = [] {
+        std::unordered_map<std::string, ov::OpSet> opsets;
+        for (const auto& [name, mk_opset] : ov::get_available_opsets()) {
+            opsets[name] = mk_opset();
+        }
+        return opsets;
+    }();
+    const auto version = static_cast<size_t>(ov::util::pugixml::get_uint64_attr(root, "version", 0));
+
+    auto create_extensions_map = [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
+        std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> exts;
+        std::vector<ov::Extension::Ptr> m_extensions;
+        OV_CREATE_EXTENSION(m_extensions);
+        for (const auto& ext : m_extensions) {
+            if (auto base_ext = std::dynamic_pointer_cast<ov::BaseOpExtension>(ext)) {
+                exts.insert({base_ext->get_type_info(), base_ext});
+            }
+        }
+        return exts;
+    }();
+
+    std::unordered_map<std::string, std::shared_ptr<ov::op::util::Variable>> variables;
+    const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights;
+    XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version);
+    std::shared_ptr<ov::Model> model;
+    visitor.on_attribute("net", model);
+    model->get_rt_info()["version"] = static_cast<int64_t>(version);
+    return model;
+}
+
 void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
                                       const std::shared_ptr<ov::AlignedBuffer>& model_buffer) {
     // Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
@@ -111,13 +177,6 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
                                                                                    model_buffer);
     }
 
-    std::shared_ptr<ov::AlignedBuffer> origin_weights_buf;
-    if (!m_origin_weights_path.empty()) {
-        auto mmap = ov::load_mmap_object(m_origin_weights_path);
-        origin_weights_buf =
-            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
-    }
-
     // XML content
     auto xml_buff = std::make_shared<std::string>();
     if (m_cache_decrypt) {
@@ -134,7 +193,7 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
     std::shared_ptr<ov::AlignedBuffer> model_buf =
         std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>((*xml_buff).data(), hdr.model_size, xml_buff);
 
-    model = m_model_builder(model_buf, weights_buf, origin_weights_buf);
+    model = create_ov_model(model_buf, weights_buf, m_origin_weights_buf);
 
     // Set Info
     pugi::xml_node root = xml_in_out_doc.child("cnndata");
@@ -180,13 +239,6 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
         model_stream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
     }
 
-    std::shared_ptr<ov::AlignedBuffer> origin_weights_buf;
-    if (!m_origin_weights_path.empty()) {
-        auto mmap = ov::load_mmap_object(m_origin_weights_path);
-        origin_weights_buf =
-            std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
-    }
-
     // read XML content
     auto xml_string = std::make_shared<std::string>();
     model_stream.seekg(hdr.model_offset + hdr_pos);
@@ -211,7 +263,7 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
         hdr.consts_size,
         data_blob);
 
-    model = m_model_builder(model_buf, weights_buf, origin_weights_buf);
+    model = create_ov_model(model_buf, weights_buf, m_origin_weights_buf);
 
     // Set Info
     pugi::xml_node root = xmlInOutDoc.child("cnndata");
@@ -250,49 +302,49 @@ void XmlDeserializer::set_constant_num_buffer(ov::AttributeAdapter<std::shared_p
         is_wlc_way &= !wlc.empty() && wlc.is<ov::WeightlessCacheAttribute>();
     }
 
-    if (is_wlc_way) {
-        const auto& wlc_attribute = wlc.as<ov::WeightlessCacheAttribute>();
-
-        auto actual_size = wlc_attribute.original_size;
-        auto offset = wlc_attribute.bin_offset;
-        auto w_size = m_origin_weights->size();
-        OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!");
-
-        auto original_dtype = wlc_attribute.original_dtype;
-        char* data = m_origin_weights->get_ptr<char>() + offset;
-
-        ov::Shape shape;
-        OPENVINO_ASSERT(getParameters<size_t>(dn, "shape", shape),
-                        "[ CPU ] Could not get attribute 'shape' during weights deserialization.");
-
-        if (original_dtype != target_dtype) {
-            const auto org_tensor = ov::Tensor(original_dtype, shape, data);
-            auto converted_weights =
-                std::make_shared<ov::AlignedBuffer>(ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
-            auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}};
-            auto convert = op::v0::Convert();
-            OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported");
-            adapter.set(converted_weights);
-        } else {
-            if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) {
-                const auto type = ov::util::pugixml::get_str_attr(get_node(), "type");
-                OPENVINO_THROW("Attribute and shape size are inconsistent for ",
-                               type,
-                               " op!",
-                               actual_size,
-                               ", ",
-                               ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3),
-                               ", ",
-                               ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
-            }
+    if (!is_wlc_way) {
+        ov::util::XmlDeserializer::set_constant_num_buffer(adapter);
+        return;
+    }
 
-            auto buffer = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(data,
-                                                                                                 actual_size,
-                                                                                                 m_origin_weights);
-            adapter.set(buffer);
-        }
+    const auto& wlc_attribute = wlc.as<ov::WeightlessCacheAttribute>();
+
+    auto actual_size = wlc_attribute.original_size;
+    auto offset = wlc_attribute.bin_offset;
+    auto w_size = m_origin_weights->size();
+    OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!");
+
+    auto original_dtype = wlc_attribute.original_dtype;
+    char* data = m_origin_weights->get_ptr<char>() + offset;
+
+    ov::Shape shape;
+    OPENVINO_ASSERT(getParameters<size_t>(dn, "shape", shape),
+                    "[ CPU ] Could not get attribute 'shape' during weights deserialization.");
+
+    if (original_dtype != target_dtype) {
+        const auto org_tensor = ov::Tensor(original_dtype, shape, data);
+        auto converted_weights =
+            std::make_shared<ov::AlignedBuffer>(ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
+        auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}};
+        auto convert = op::v0::Convert();
+        OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported");
+        adapter.set(converted_weights);
     } else {
-        ov::util::XmlDeserializer::set_constant_num_buffer(adapter);
+        if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) {
+            const auto type = ov::util::pugixml::get_str_attr(get_node(), "type");
+            OPENVINO_THROW("Attribute and shape size are inconsistent for ",
+                           type,
+                           " op!",
+                           actual_size,
+                           ", ",
+                           ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3),
+                           ", ",
+                           ov::util::get_memory_size(target_dtype, ov::shape_size(shape)));
+        }
+
+        auto buffer =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(data, actual_size, m_origin_weights);
+        adapter.set(buffer);
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
index 9bc9c7a445c366..67704c868ac23a 100644
--- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
+++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp
@@ -15,6 +15,9 @@
 #include "openvino/util/xml_parse_utils.hpp"
 #include "utils/codec_xor.hpp"
 
+namespace ov {
+class ICore;
+}
 namespace ov::intel_cpu {
 
 template <class T>
@@ -70,21 +73,17 @@ class XmlDeserializer : public ov::util::XmlDeserializer {
 
 class ModelDeserializer {
 public:
-    using ModelBuilder = std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
-                                                                  const std::shared_ptr<ov::AlignedBuffer>&,
-                                                                  const std::shared_ptr<ov::AlignedBuffer>&)>;
-
     ModelDeserializer(std::shared_ptr<ov::AlignedBuffer>& model_buffer,
-                      ModelBuilder fn,
+                      const std::shared_ptr<ov::ICore>& core,
                       const CacheDecrypt& decrypt_fn,
                       bool decript_from_string,
-                      std::string origin_weights_path = "");
+                      const std::string& origin_weights_path = "");
 
     ModelDeserializer(std::istream& model_stream,
-                      ModelBuilder fn,
+                      const std::shared_ptr<ov::ICore>& core,
                       const CacheDecrypt& decrypt_fn,
                       bool decript_from_string,
-                      std::string origin_weights_path = "");
+                      const std::string& origin_weights_path = "");
 
     virtual ~ModelDeserializer() = default;
 
@@ -94,13 +93,18 @@ class ModelDeserializer {
     static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
 
     void process_model(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& model_buffer);
+
     void process_model(std::shared_ptr<ov::Model>& model, std::reference_wrapper<std::istream> model_stream);
 
+    std::shared_ptr<ov::Model> create_ov_model(const std::shared_ptr<ov::AlignedBuffer>& model,
+                                               const std::shared_ptr<ov::AlignedBuffer>& weights,
+                                               const std::shared_ptr<ov::AlignedBuffer>& origin_weights);
+
     std::variant<std::shared_ptr<ov::AlignedBuffer>, std::reference_wrapper<std::istream>> m_model;
-    ModelBuilder m_model_builder;
+    std::shared_ptr<ov::ICore> m_core;
     CacheDecrypt m_cache_decrypt;
     bool m_decript_from_string;
-    std::string m_origin_weights_path;
+    std::shared_ptr<ov::AlignedBuffer> m_origin_weights_buf;
 };
 
 }  //  namespace ov::intel_cpu