From 63f723ea09272b57168c6ef5ff3c86f01b930692 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Mon, 15 Sep 2025 17:13:28 +0400 Subject: [PATCH 1/3] [CPU] Weightless cache support --- samples/cpp/benchmark_app/main.cpp | 5 + ...k_subgraphs_to_keep_in_mixed_precision.cpp | 1 - src/core/CMakeLists.txt | 4 +- .../rt_info/weightless_caching_attributes.hpp | 2 + .../openvino/xml_util/xml_serialize_util.hpp | 4 +- .../op/util/weightless_caching_attributes.cpp | 9 +- src/core/src/xml_util/xml_serialize_util.cpp | 6 +- .../xml_util/xml_deserialize_util.hpp | 3 + src/inference/src/dev/core_impl.cpp | 22 +- src/plugins/intel_cpu/CMakeLists.txt | 4 +- src/plugins/intel_cpu/src/compiled_model.cpp | 10 +- src/plugins/intel_cpu/src/config.cpp | 9 +- src/plugins/intel_cpu/src/config.h | 2 + src/plugins/intel_cpu/src/plugin.cpp | 208 ++++++++++++++---- src/plugins/intel_cpu/src/plugin.h | 2 +- .../deserializer.cpp} | 142 +++++++++--- .../utils/graph_serializer/deserializer.hpp | 119 ++++++++++ .../src/utils/graph_serializer/serializer.cpp | 181 +++++++++++++++ .../src/utils/graph_serializer/serializer.hpp | 39 ++++ src/plugins/intel_cpu/src/utils/serialize.hpp | 64 ------ .../ov_executable_network/properties.cpp | 2 +- .../custom/behavior/ov_plugin/properties.cpp | 5 + .../behavior/compiled_model/model_cache.cpp | 26 +++ .../skip_tests_config.cpp | 2 + .../intel_cpu/tests/unit/CMakeLists.txt | 1 + .../tests/unit/vectorized/CMakeLists.txt | 1 + .../behavior/compiled_model/model_cache.hpp | 56 +++++ .../behavior/compiled_model/model_cache.cpp | 190 ++++++++++++++++ 28 files changed, 954 insertions(+), 165 deletions(-) rename src/plugins/intel_cpu/src/utils/{serialize.cpp => graph_serializer/deserializer.cpp} (58%) create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp create mode 100644 src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp delete mode 100644 src/plugins/intel_cpu/src/utils/serialize.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp create mode 100644 src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp create mode 100644 src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 3ae50e13d7433e..928ed621d4c736 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -606,6 +606,11 @@ int main(int argc, char* argv[]) { if (is_virtual_device(device)) { device_nstreams.erase(device); } + + if (!FLAGS_cache_dir.empty()) { + // Choose between better model compilation time and cache file size. + device_config[ov::cache_mode.name()] = ov::CacheMode::OPTIMIZE_SPEED; + } } auto result = std::find_if(config.begin(), config.end(), [&](const std::pair& item) { return device_name.find(item.first) == 0; diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp index 27c137a1731fe1..f0a091b65219f7 100644 --- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -50,7 +50,6 @@ #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp" -#include "transformations/convert_precision.hpp" #include "transformations/fp16_compression/mark_floatpoint_range.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/utils/utils.hpp" diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1dadebdf3e66d1..3443b20c5a77e0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -122,9 +122,7 @@ endif() # some sources are located in openvino_core, while headers are in openvino_transformations file(GLOB_RECURSE smart_reshape_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/smart_reshape/*.cpp) file(GLOB_RECURSE rt_info_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/rt_info/*.cpp) -set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_precision.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/init_node_info.cpp" +set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/pass/serialize.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/op/type_relaxed.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/preprocess/preprocess_steps_impl.cpp" diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp index f5c7d3446dbcb6..7e5ea18bbcb853 100644 --- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -35,6 +35,8 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { bool is_copyable() const override; + bool visit_attributes(AttributeVisitor& visitor) override; + size_t original_size; size_t bin_offset; ov::element::Type original_dtype; diff --git a/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp b/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp index c847b61bd3a96d..ff72ad9b35d56b 100644 --- a/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp +++ b/src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp @@ -76,9 +76,7 @@ class OPENVINO_API XmlSerializer : public ov::AttributeVisitor { virtual void append_rt_info(pugi::xml_node& node, ov::RTMap& attributes); virtual bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute); virtual bool append_node_attributes(ov::Node& node); - virtual util::ConstantWriter& get_constant_write_handler() const { - return m_constant_node_write_handler; - } + virtual util::ConstantWriter& get_constant_write_handler(); public: XmlSerializer(pugi::xml_node& data, diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp index 4e595e475226d1..6deb130709d5c7 100644 --- a/src/core/src/op/util/weightless_caching_attributes.cpp +++ b/src/core/src/op/util/weightless_caching_attributes.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2018-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -11,6 +11,13 @@ bool ov::WeightlessCacheAttribute::is_copyable() const { return false; } +bool ov::WeightlessCacheAttribute::visit_attributes(AttributeVisitor& visitor) { + visitor.on_attribute("original_dtype", original_dtype); + visitor.on_attribute("bin_offset", bin_offset); + visitor.on_attribute("original_size", original_size); + return true; +} + OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr& from, const std::shared_ptr& to) { const auto& rt_info = from->get_rt_info(); diff --git a/src/core/src/xml_util/xml_serialize_util.cpp b/src/core/src/xml_util/xml_serialize_util.cpp index 96a8c1772344a0..97af95cf099a2d 100644 --- a/src/core/src/xml_util/xml_serialize_util.cpp +++ b/src/core/src/xml_util/xml_serialize_util.cpp @@ -1061,9 +1061,9 @@ bool XmlSerializer::append_node_attributes(ov::Node& node) { return node.visit_attributes(*this); } -// util::ConstantWriter& XmlSerializer::get_constant_write_handler() { -// return m_constant_node_write_handler.get(); -// } +util::ConstantWriter& XmlSerializer::get_constant_write_handler() { + return m_constant_node_write_handler.get(); +} std::string get_ir_precision_name(const element::Type& precision) { switch (precision) { diff --git a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp index d7d9ea02b3dd28..653810b0ca3eaf 100644 --- a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp +++ b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp @@ -53,6 +53,9 @@ class XmlDeserializer : public ov::AttributeVisitor { virtual void set_constant_num_buffer(ov::AttributeAdapter>& adapter); const pugi::xml_node& get_node() const; + const std::shared_ptr& get_weights() const { + return m_weights; + } private: struct IoMap { diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index db916280eabfaf..56e918714f7610 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -30,6 +30,7 @@ #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/file_util.hpp" +#include "openvino/util/log.hpp" #include "openvino/util/shared_object.hpp" #include "openvino/util/variant_visitor.hpp" #include "openvino/util/xml_parse_utils.hpp" @@ -851,6 +852,16 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< const auto compiled_config = create_compile_config(plugin, parsed._config); cache_content.blobId = ModelCache::compute_hash(model, cache_content.modelPath, compiled_config); cache_content.model = model; + + const auto& cache_mode_it = config.find(cache_mode.name()); + if (cache_mode_it != config.end() && cache_mode_it->second == CacheMode::OPTIMIZE_SIZE) { + const auto& rt_info = model->get_rt_info(); + auto weights_path = rt_info.find("__weights_path"); + if (weights_path != rt_info.end()) { + parsed._config[ov::weights_path.name()] = weights_path->second; + } + } + const auto lock = cacheGuard.get_hash_lock(cache_content.blobId); res = load_model_from_cache(cache_content, plugin, parsed._config, {}, [&]() { return compile_model_and_cache(plugin, model, parsed._config, {}, cache_content); @@ -1594,10 +1605,6 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( update_config[ov::hint::model.name()] = cacheContent.model; } - if (util::contains(plugin.get_property(ov::supported_properties), ov::hint::model) && - cacheContent.model) { - update_config[ov::hint::model.name()] = cacheContent.model; - } if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) { util::Path weights_path; @@ -1606,7 +1613,6 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( weights_path = path_hint->second.as(); } else if (weights_path = extract_weight_path(header.get_runtime_info()); weights_path.empty()) { weights_path = cacheContent.modelPath; - weights_path.replace_extension(".bin"); } weights_path.replace_extension(".bin"); @@ -1638,9 +1644,11 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( // throw; } - // fallback scenario - if (!compiled_model) + // Fallback scenario + if (!compiled_model) { + OPENVINO_WARN("Could not load model from cache."); compiled_model = compile_model_lambda(); + } return compiled_model; } diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index f0bcf4b7287285..d067bb58ed08ff 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -261,7 +261,8 @@ ov_mark_target_as_cc(${TARGET_NAME}) target_link_libraries(${TARGET_NAME} PRIVATE dnnl openvino::shape_inference - openvino::snippets) + openvino::snippets + openvino_xml_util) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) if (ENABLE_MLAS_FOR_CPU) @@ -397,6 +398,7 @@ if(BUILD_SHARED_LIBS) $ $ $ + $ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src $) diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 2940d6ad41382a..82188856c853d7 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -36,8 +36,8 @@ #include "sub_memory_manager.hpp" #include "utils/debug_capabilities.h" #include "utils/general_utils.h" +#include "utils/graph_serializer/serializer.hpp" #include "utils/memory_stats_dump.hpp" -#include "utils/serialize.hpp" #if defined(OV_CPU_WITH_ACL) # include @@ -303,8 +303,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::key_cache_precision.name()), RO_property(ov::value_cache_precision.name()), RO_property(ov::key_cache_group_size.name()), - RO_property(ov::value_cache_group_size.name()), - }; + RO_property(ov::value_cache_group_size.name())}; return ro_properties; } @@ -400,11 +399,14 @@ ov::Any CompiledModel::get_property(const std::string& name) const { if (name == ov::value_cache_group_size) { return static_cast(config.valueCacheGroupSize); } + if (name == ov::weights_path) { + return static_cast(""); + } OPENVINO_THROW("Unsupported property: ", name); } void CompiledModel::export_model(std::ostream& modelStream) const { - ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt); + ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt, m_cfg.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE); serializer << m_model; } diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 4de2b7f7133cad..dce25d5f24f080 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -446,7 +446,14 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { } catch (ov::Exception&) { OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); } - } else if (key == ov::internal::caching_with_mmap.name()) { + } else if (key == ov::cache_mode.name()) { + try { + m_cache_mode = val.as(); + } catch (...) { + OPENVINO_THROW("Wrong value for property key ", ov::cache_mode.name()); + } + } else if (key == ov::hint::model.name() || key == ov::internal::caching_with_mmap.name() || + key == ov::weights_path.name()) { } else if (key == ov::intel_cpu::enable_sage_attn.name()) { try { enableSageAttn = val.as(); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 3d3337ea7e136f..68ea781a204c34 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -138,6 +138,8 @@ struct Config { std::function cacheEncrypt; std::function cacheDecrypt; + ov::CacheMode m_cache_mode = ov::CacheMode::OPTIMIZE_SPEED; + #ifdef CPU_DEBUG_CAPS DebugCapsConfig debugCaps; void applyDebugCapsProperties(); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 57cb1737b3a3ab..226c198851e29a 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -49,14 +49,16 @@ #include "openvino/runtime/threading/cpu_message.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/runtime/threading/istreams_executor.hpp" +#include "openvino/util/xml_parse_utils.hpp" #include "sigstack_manager.h" #include "transformations/transformation_pipeline.h" #include "transformations/utils/utils.hpp" #include "utils/codec_xor.hpp" #include "utils/debug_capabilities.h" #include "utils/denormals.hpp" +#include "utils/graph_serializer/deserializer.hpp" +#include "utils/graph_serializer/serializer.hpp" #include "utils/precision_support.h" -#include "utils/serialize.hpp" #include "weights_cache.hpp" #include "xbyak/xbyak_util.h" @@ -511,6 +513,10 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::value_cache_group_size)::value_type(engConfig.valueCacheGroupSize); } + if (name == ov::weights_path) { + return decltype(ov::weights_path)::value_type(std::string("")); + } + return get_ro_property(name, options); } @@ -521,6 +527,9 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const auto RW_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RW); }; + auto WO_property = [](const std::string& propertyName) { + return ov::PropertyName(propertyName, ov::PropertyMutability::WO); + }; if (name == ov::supported_properties) { std::vector roProperties{ @@ -535,36 +544,37 @@ ov::Any Plugin::get_ro_property(const std::string& name, [[maybe_unused]] const RO_property(ov::device::architecture.name()), }; // the whole config is RW before model is loaded. - std::vector rwProperties{ - RW_property(ov::num_streams.name()), - RW_property(ov::inference_num_threads.name()), - RW_property(ov::enable_profiling.name()), - RW_property(ov::hint::inference_precision.name()), - RW_property(ov::hint::performance_mode.name()), - RW_property(ov::hint::execution_mode.name()), - RW_property(ov::hint::num_requests.name()), - RW_property(ov::hint::enable_cpu_pinning.name()), - RW_property(ov::hint::enable_cpu_reservation.name()), - RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::model_distribution_policy.name()), - RW_property(ov::hint::enable_hyper_threading.name()), - RW_property(ov::device::id.name()), - RW_property(ov::intel_cpu::denormals_optimization.name()), - RW_property(ov::log::level.name()), - RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()), - RW_property(ov::intel_cpu::enable_tensor_parallel.name()), - RW_property(ov::hint::dynamic_quantization_group_size.name()), - RW_property(ov::hint::kv_cache_precision.name()), - RW_property(ov::key_cache_precision.name()), - RW_property(ov::value_cache_precision.name()), - RW_property(ov::key_cache_group_size.name()), - RW_property(ov::value_cache_group_size.name()), - }; + std::vector rwProperties{RW_property(ov::num_streams.name()), + RW_property(ov::inference_num_threads.name()), + RW_property(ov::enable_profiling.name()), + RW_property(ov::hint::inference_precision.name()), + RW_property(ov::hint::performance_mode.name()), + RW_property(ov::hint::execution_mode.name()), + RW_property(ov::hint::num_requests.name()), + RW_property(ov::hint::enable_cpu_pinning.name()), + RW_property(ov::hint::enable_cpu_reservation.name()), + RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::model_distribution_policy.name()), + RW_property(ov::hint::enable_hyper_threading.name()), + RW_property(ov::device::id.name()), + RW_property(ov::intel_cpu::denormals_optimization.name()), + RW_property(ov::log::level.name()), + RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()), + RW_property(ov::intel_cpu::enable_tensor_parallel.name()), + RW_property(ov::hint::dynamic_quantization_group_size.name()), + RW_property(ov::hint::kv_cache_precision.name()), + RW_property(ov::key_cache_precision.name()), + RW_property(ov::value_cache_precision.name()), + RW_property(ov::key_cache_group_size.name()), + RW_property(ov::value_cache_group_size.name())}; + + std::vector wo_properties{WO_property(ov::weights_path.name())}; std::vector supportedProperties; - supportedProperties.reserve(roProperties.size() + rwProperties.size()); + supportedProperties.reserve(roProperties.size() + rwProperties.size() + wo_properties.size()); supportedProperties.insert(supportedProperties.end(), roProperties.begin(), roProperties.end()); supportedProperties.insert(supportedProperties.end(), rwProperties.begin(), rwProperties.end()); + supportedProperties.insert(supportedProperties.end(), wo_properties.begin(), wo_properties.end()); return decltype(ov::supported_properties)::value_type(std::move(supportedProperties)); } @@ -691,24 +701,94 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); +std::string get_origin_weights_path(const ov::AnyMap& config) { + ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED; + std::string origin_weights_path; + + auto cm_it = config.find(ov::cache_mode.name()); + if (cm_it != config.end()) { + cache_mode = cm_it->second.as(); + if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE) { + auto wp_it = config.find(ov::weights_path.name()); + if (wp_it != config.end()) { + origin_weights_path = wp_it->second.as(); + } + } + } + + return origin_weights_path; +} + +bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) { + bool decrypt_from_string = false; - CacheDecrypt decrypt{codec_xor}; - bool decript_from_string = false; if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) { const auto& encryption_callbacks = it->second.as(); decrypt.m_decrypt_str = encryption_callbacks.decrypt; - decript_from_string = true; + decrypt_from_string = true; } + return decrypt_from_string; +} + +std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); + + CacheDecrypt decrypt{codec_xor}; + auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt); + const auto origin_weights_path = get_origin_weights_path(config); + ModelDeserializer deserializer( model_stream, - [this](const std::shared_ptr& model, const std::shared_ptr& weights) { - return get_core()->read_model(model, weights); + [this]( + const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights) { + if (origin_weights == nullptr) { + return get_core()->read_model(model, weights); + } else { + // Custom deserialization for weightless mode + + pugi::xml_document xml_doc; + const auto root = [&] { + auto res = + xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); + OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); + return xml_doc.document_element(); + }(); + const auto opsets = [] { + std::unordered_map opsets; + for (const auto& [name, mk_opset] : ov::get_available_opsets()) { + opsets[name] = mk_opset(); + } + return opsets; + }(); + const auto version = static_cast(ov::util::pugixml::get_uint64_attr(root, "version", 0)); + + auto create_extensions_map = + [&]() -> std::unordered_map { + std::unordered_map exts; + std::vector m_extensions; + OV_CREATE_EXTENSION(m_extensions); + for (const auto& ext : m_extensions) { + if (auto base_ext = std::dynamic_pointer_cast(ext)) + exts.insert({base_ext->get_type_info(), base_ext}); + } + return exts; + }(); + + std::unordered_map> variables; + const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights; + XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version); + std::shared_ptr model; + visitor.on_attribute("net", model); + model->get_rt_info()["version"] = int64_t(version); + return model; + } }, decrypt, - decript_from_string); + decrypt_from_string, + origin_weights_path); return deserialize_model(deserializer, config); } @@ -718,12 +798,8 @@ std::shared_ptr Plugin::import_model(const ov::Tensor& model OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); CacheDecrypt decrypt{codec_xor}; - bool decript_from_string = false; - if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) { - const auto& encryption_callbacks = it->second.as(); - decrypt.m_decrypt_str = encryption_callbacks.decrypt; - decript_from_string = true; - } + auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt); + const auto origin_weights_path = get_origin_weights_path(config); std::shared_ptr model_buffer = std::make_shared>(reinterpret_cast(model_tensor.data()), @@ -732,11 +808,55 @@ std::shared_ptr Plugin::import_model(const ov::Tensor& model ModelDeserializer deserializer( model_buffer, - [this](const std::shared_ptr& model, const std::shared_ptr& weights) { - return get_core()->read_model(model, weights); + [this]( + const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights) { + if (origin_weights == nullptr) { + return get_core()->read_model(model, weights); + } else { + // Custom deserialization for weightless mode + + pugi::xml_document xml_doc; + const auto root = [&] { + auto res = + xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); + OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); + return xml_doc.document_element(); + }(); + const auto opsets = [] { + std::unordered_map opsets; + for (const auto& [name, mk_opset] : ov::get_available_opsets()) { + opsets[name] = mk_opset(); + } + return opsets; + }(); + const auto version = static_cast(ov::util::pugixml::get_uint64_attr(root, "version", 0)); + + auto create_extensions_map = + [&]() -> std::unordered_map { + std::unordered_map exts; + std::vector m_extensions; + OV_CREATE_EXTENSION(m_extensions); + for (const auto& ext : m_extensions) { + if (auto base_ext = std::dynamic_pointer_cast(ext)) + exts.insert({base_ext->get_type_info(), base_ext}); + } + return exts; + }(); + + std::unordered_map> variables; + const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights; + XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version); + std::shared_ptr model; + visitor.on_attribute("net", model); + model->get_rt_info()["version"] = int64_t(version); + return model; + } }, decrypt, - decript_from_string); + decrypt_from_string, + origin_weights_path); return deserialize_model(deserializer, config); } diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index bf4a6fb87254af..e138460500428a 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -18,7 +18,7 @@ #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/so_ptr.hpp" #include "openvino/runtime/threading/cpu_message.hpp" -#include "utils/serialize.hpp" +#include "utils/graph_serializer/deserializer.hpp" namespace ov::intel_cpu { diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp similarity index 58% rename from src/plugins/intel_cpu/src/utils/serialize.cpp rename to src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp index aa60590beeaf9e..8a45c184b129bc 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp @@ -2,60 +2,45 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "serialize.hpp" +#include "deserializer.hpp" #include #include #include #include #include -#include #include #include #include +#include "openvino/core/any.hpp" #include "openvino/core/except.hpp" +#include "openvino/core/memory_util.hpp" #include "openvino/core/model.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/core/shape.hpp" #include "openvino/core/type/element_type.hpp" +#include "openvino/op/convert.hpp" #include "openvino/pass/serialize.hpp" #include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/tensor.hpp" +#include "openvino/util/mmap_object.hpp" +#include "openvino/util/xml_parse_utils.hpp" +#include "openvino/xml_util/xml_deserialize_util.hpp" #include "utils/codec_xor.hpp" namespace ov::intel_cpu { -////////// ModelSerializer ////////// - -ModelSerializer::ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn) - : ov::pass::StreamSerialize( - ostream, - [](std::ostream& stream) { - pugi::xml_document xml_doc; - pugi::xml_node root = xml_doc.append_child("cnndata"); - root.append_child("outputs"); - xml_doc.save(stream); - }, - encrypt_fn) {}; - -void ModelSerializer::operator<<(const std::shared_ptr& model) { - run_on_model(std::const_pointer_cast(model->clone())); -} - -bool ModelSerializer::use_absolute_offset() { - return false; -} - -////////// ModelDeserializer ////////// - ModelDeserializer::ModelDeserializer(std::shared_ptr& model_buffer, ModelBuilder fn, const CacheDecrypt& decrypt_fn, - bool decript_from_string) + bool decript_from_string, + std::string origin_weights_path) : m_model(model_buffer), m_model_builder(std::move(fn)), - m_decript_from_string(decript_from_string) { + m_decript_from_string(decript_from_string), + m_origin_weights_path(std::move(origin_weights_path)) { if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -66,10 +51,12 @@ ModelDeserializer::ModelDeserializer(std::shared_ptr& model_b ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, - bool decript_from_string) + bool decript_from_string, + std::string origin_weights_path) : m_model(model_stream), m_model_builder(std::move(fn)), - m_decript_from_string(decript_from_string) { + m_decript_from_string(decript_from_string), + m_origin_weights_path(std::move(origin_weights_path)) { if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -124,6 +111,13 @@ void ModelDeserializer::process_model(std::shared_ptr& model, model_buffer); } + std::shared_ptr origin_weights_buf; + if (!m_origin_weights_path.empty()) { + auto mmap = ov::load_mmap_object(m_origin_weights_path); + origin_weights_buf = + std::make_shared>>(mmap->data(), mmap->size(), mmap); + } + // XML content auto xml_buff = std::make_shared(); if (m_cache_decrypt) { @@ -140,7 +134,7 @@ void ModelDeserializer::process_model(std::shared_ptr& model, std::shared_ptr model_buf = std::make_shared>>((*xml_buff).data(), hdr.model_size, xml_buff); - model = m_model_builder(model_buf, weights_buf); + model = m_model_builder(model_buf, weights_buf, origin_weights_buf); // Set Info pugi::xml_node root = xml_in_out_doc.child("cnndata"); @@ -186,6 +180,13 @@ void ModelDeserializer::process_model(std::shared_ptr& model, model_stream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } + std::shared_ptr origin_weights_buf; + if (!m_origin_weights_path.empty()) { + auto mmap = ov::load_mmap_object(m_origin_weights_path); + origin_weights_buf = + std::make_shared>>(mmap->data(), mmap->size(), mmap); + } + // read XML content auto xml_string = std::make_shared(); model_stream.seekg(hdr.model_offset + hdr_pos); @@ -210,10 +211,89 @@ void ModelDeserializer::process_model(std::shared_ptr& model, hdr.consts_size, data_blob); - model = m_model_builder(model_buf, weights_buf); + model = m_model_builder(model_buf, weights_buf, origin_weights_buf); // Set Info pugi::xml_node root = xmlInOutDoc.child("cnndata"); set_info(root, model); }; + +ov::Any XmlDeserializer::parse_weightless_cache_attribute(const pugi::xml_node& node) const { + if (auto rt_info = node.child("rt_info")) { + for (const auto& child : rt_info.children()) { + for (const auto& attr : child.attributes()) { + if (strcmp(attr.name(), "name") == 0 && + strcmp(attr.value(), ov::WeightlessCacheAttribute::get_type_info_static().name) == 0) { + const auto origin_size = static_cast(ov::util::pugixml::get_uint64_attr(child, "size")); + const auto offset = static_cast(ov::util::pugixml::get_uint64_attr(child, "offset")); + const ov::element::Type original_dt(child.attribute("type").value()); // "element_type"? + return {ov::WeightlessCacheAttribute{origin_size, offset, original_dt}}; + } + } + } + } + return {}; +} + +void XmlDeserializer::set_constant_num_buffer(ov::AttributeAdapter>& adapter) { + OPENVINO_ASSERT(get_weights() != nullptr || m_origin_weights != nullptr, + "Empty weights data in bin file or bin file cannot be found!"); + const auto& node = get_node(); + const auto dn = node.child("data"); + const element::Type target_dtype{ov::util::pugixml::get_str_attr(dn, "element_type")}; + + // wlc -> weightless cache + bool is_wlc_way = target_dtype != element::string && m_origin_weights != nullptr; + ov::Any wlc; + if (is_wlc_way) { + wlc = parse_weightless_cache_attribute(node); + is_wlc_way &= !wlc.empty() && wlc.is(); + } + + if (is_wlc_way) { + const auto& wlc_attribute = wlc.as(); + + auto actual_size = wlc_attribute.original_size; + auto offset = wlc_attribute.bin_offset; + auto w_size = m_origin_weights->size(); + OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!"); + + auto original_dtype = wlc_attribute.original_dtype; + char* data = m_origin_weights->get_ptr() + offset; + + ov::Shape shape; + OPENVINO_ASSERT(getParameters(dn, "shape", shape), + "[ CPU ] Could not get attribute 'shape' during weights deserialization."); + + if (original_dtype != target_dtype) { + const auto org_tensor = ov::Tensor(original_dtype, shape, data); + auto converted_weights = + std::make_shared(ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); + auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}}; + auto convert = op::v0::Convert(); + OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported"); + adapter.set(converted_weights); + } else { + if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) { + const auto type = ov::util::pugixml::get_str_attr(get_node(), "type"); + OPENVINO_THROW("Attribute and shape size are inconsistent for ", + type, + " op!", + actual_size, + ", ", + ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3), + ", ", + ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); + } + + auto buffer = std::make_shared>>(data, + actual_size, + m_origin_weights); + adapter.set(buffer); + } + } else { + ov::util::XmlDeserializer::set_constant_num_buffer(adapter); + } +} + } // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp new file mode 100644 index 00000000000000..93476c24e5b6fc --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "../xml_util/include/openvino/xml_util/xml_deserialize_util.hpp" +#include "openvino/core/model.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/util/xml_parse_utils.hpp" +#include "utils/codec_xor.hpp" + +namespace ov::intel_cpu { + +template +void str_to_container(const std::string& value, T& res) { + std::stringstream ss(value); + std::string field; + while (getline(ss, field, ',')) { + OPENVINO_ASSERT(!field.empty(), "Cannot get vector of parameters! \"", value, "\" is incorrect"); + std::stringstream fs(field); + typename T::value_type val; + fs >> val; + res.insert(res.end(), val); + } +} + +template +bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector& value) { + str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value); + return true; +} + +class XmlDeserializer : public ov::util::XmlDeserializer { +public: + explicit XmlDeserializer(const pugi::xml_node& node, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights, + const std::unordered_map& opsets, + const std::unordered_map& extensions, + std::unordered_map>& variables, + size_t version) + : ov::util::XmlDeserializer(node, weights, opsets, extensions, variables, version), + m_origin_weights{origin_weights} {} + + explicit XmlDeserializer(const pugi::xml_node& node, + const std::shared_ptr& weights, + const std::unordered_map& opsets, + const std::unordered_map& extensions, + std::unordered_map>& variables, + size_t version) + : XmlDeserializer(node, weights, nullptr, opsets, extensions, variables, version) {} + +protected: + ov::Any parse_weightless_cache_attribute(const pugi::xml_node& node) const override; + + void set_constant_num_buffer(ov::AttributeAdapter>& adapter) override; + +private: + std::unique_ptr make_visitor( + const pugi::xml_node& node, + const std::shared_ptr& weights, + const std::unordered_map& opsets, + const std::unordered_map& extensions, + std::unordered_map>& variables, + size_t version) const override { + return std::make_unique(node, + weights, + m_origin_weights, + opsets, + extensions, + variables, + version); + } + + std::shared_ptr m_origin_weights; +}; + +class ModelDeserializer { +public: + using ModelBuilder = std::function(const std::shared_ptr&, + const std::shared_ptr&, + const std::shared_ptr&)>; + + ModelDeserializer(std::shared_ptr& model_buffer, + ModelBuilder fn, + const CacheDecrypt& decrypt_fn, + bool decript_from_string, + std::string origin_weights_path = ""); + + ModelDeserializer(std::istream& model_stream, + ModelBuilder fn, + const CacheDecrypt& decrypt_fn, + bool decript_from_string, + std::string origin_weights_path = ""); + + virtual ~ModelDeserializer() = default; + + void operator>>(std::shared_ptr& model); + +protected: + static void set_info(pugi::xml_node& root, std::shared_ptr& model); + + void process_model(std::shared_ptr& model, const std::shared_ptr& model_buffer); + void process_model(std::shared_ptr& model, std::reference_wrapper model_stream); + + std::variant, std::reference_wrapper> m_model; + ModelBuilder m_model_builder; + CacheDecrypt m_cache_decrypt; + bool m_decript_from_string; + std::string m_origin_weights_path; +}; + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp new file mode 100644 index 00000000000000..114991ec59ea78 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "serializer.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "openvino/core/model.hpp" +#include "openvino/core/node.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" +#include "openvino/core/runtime_attribute.hpp" +#include "openvino/core/type.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/pass/serialize.hpp" +#include "openvino/xml_util/constant_writer.hpp" +#include "openvino/xml_util/xml_serialize_util.hpp" + +namespace ov::intel_cpu { + +class WeightlessWriter : public util::ConstantWriter { +public: + explicit WeightlessWriter(util::ConstantWriter& other) : util::ConstantWriter(other), m_offset{} {} + + WeightlessWriter(std::ostream& bin_file) : util::ConstantWriter(bin_file), m_offset{} {} + + WeightlessWriter::FilePosition write([[maybe_unused]] const char* ptr, + size_t size, + size_t& new_size, + [[maybe_unused]] bool compress_to_fp16, + [[maybe_unused]] ov::element::Type src_type, + [[maybe_unused]] bool ptr_is_temporary) override { + WeightlessWriter::FilePosition offset = 0L; + + if (m_skip_weights) { + new_size = 0LU; + offset = m_offset; + m_offset += size; + } else { + offset = util::ConstantWriter::write(ptr, size, new_size, compress_to_fp16, src_type, ptr_is_temporary); + } + + return offset; + } + + void skip_weights(bool skip_weights) { + m_skip_weights = skip_weights; + } + +private: + WeightlessWriter::FilePosition m_offset; + bool m_skip_weights = false; +}; + +class XmlSerializer : public util::XmlSerializer { +public: + XmlSerializer(pugi::xml_node& data, + const std::string& node_type_name, + util::ConstantWriter& constant_write_handler, + int64_t version, + bool deterministic = false, + bool compress_to_fp16 = false, + ov::element::Type output_element_type = ov::element::dynamic, + bool data_is_temporary = false, + bool wl_mode = false) + : util::XmlSerializer(data, + node_type_name, + constant_write_handler, + version, + deterministic, + compress_to_fp16, + output_element_type, + data_is_temporary), + m_weightless_const_writer(constant_write_handler), + m_weightless_mode(wl_mode) {} + +private: + bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute) override { + bool result = false; + if (const auto* wl_attr = ov::as_type(&attribute)) { + m_weightless_const_writer.skip_weights(true); + + const auto& type_info = attribute.get_type_info(); + node.append_attribute("name").set_value(type_info.name); + node.append_attribute("version").set_value(type_info.get_version().data()); + node.append_attribute("type").set_value(util::get_ir_precision_name(wl_attr->original_dtype).data()); + node.append_attribute("offset").set_value(wl_attr->bin_offset); + node.append_attribute("size").set_value(wl_attr->original_size); + + result = true; + } else { + result = util::XmlSerializer::append_rt_attribute(node, attribute); + } + + return result; + } + + bool append_node_attributes(ov::Node& node) override { + m_weightless_const_writer.skip_weights( + m_weightless_mode && node.get_rt_info().count(ov::WeightlessCacheAttribute::get_type_info_static()) != 0); + + auto result = util::XmlSerializer::append_node_attributes(node); + + return result; + } + + ov::util::ConstantWriter& get_constant_write_handler() override { + return m_weightless_const_writer; + } + + std::unique_ptr make_visitor(pugi::xml_node& data, + const std::string& node_type_name, + util::ConstantWriter& constant_write_handler, + int64_t version, + bool deterministic, + bool compress_to_fp16, + ov::element::Type output_element_type, + bool data_is_temporary) const override { + return std::make_unique(data, + node_type_name, + constant_write_handler, + version, + deterministic, + compress_to_fp16, + output_element_type, + data_is_temporary, + m_weightless_mode); + } + + WeightlessWriter m_weightless_const_writer; + bool m_weightless_mode = false; +}; + +////////// ModelSerializer ////////// + +ModelSerializer::ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn, bool weightless_mode) + : ov::pass::StreamSerialize( + ostream, + [](std::ostream& stream) { + pugi::xml_document xml_doc; + pugi::xml_node root = xml_doc.append_child("cnndata"); + root.append_child("outputs"); + xml_doc.save(stream); + }, + encrypt_fn), + m_weightless_mode(weightless_mode) {}; + +void ModelSerializer::operator<<(const std::shared_ptr& model) { + run_on_model(std::const_pointer_cast(model->clone())); +} + +bool ModelSerializer::use_absolute_offset() { + return false; +} + +std::unique_ptr ModelSerializer::make_serializer(pugi::xml_node& data, + const std::string& node_type_name, + util::ConstantWriter& constant_write_handler, + int64_t version, + bool deterministic, + bool compress_to_fp16, + ov::element::Type output_element_type, + bool data_is_temporary) const { + return std::make_unique(data, + node_type_name, + constant_write_handler, + version, + deterministic, + compress_to_fp16, + output_element_type, + data_is_temporary, + m_weightless_mode); +} + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp new file mode 100644 index 00000000000000..bd10f5c20d0ffd --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/serializer.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/core/model.hpp" +#include "openvino/pass/serialize.hpp" + +namespace ov::intel_cpu { + +class ModelSerializer : private ov::pass::StreamSerialize { +public: + using CacheEncrypt = std::function; + + explicit ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn = {}, bool weightless_mode = false); + + void operator<<(const std::shared_ptr& model); + +private: + bool use_absolute_offset() override; + + std::unique_ptr make_serializer(pugi::xml_node& data, + const std::string& node_type_name, + util::ConstantWriter& constant_write_handler, + int64_t version, + bool deterministic, + bool compress_to_fp16, + ov::element::Type output_element_type, + bool data_is_temporary) const override; + + bool m_weightless_mode; +}; + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp deleted file mode 100644 index cd6789a5415ed2..00000000000000 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "openvino/core/model.hpp" -#include "openvino/pass/serialize.hpp" -#include "openvino/runtime/aligned_buffer.hpp" -#include "utils/codec_xor.hpp" - -namespace ov::intel_cpu { - -class ModelSerializer : private ov::pass::StreamSerialize { -public: - using CacheEncrypt = std::function; - - explicit ModelSerializer(std::ostream& ostream, const CacheEncrypt& encrypt_fn = {}); - - void operator<<(const std::shared_ptr& model); - -private: - bool use_absolute_offset() override; -}; - -class ModelDeserializer { -public: - using ModelBuilder = std::function(const std::shared_ptr&, - const std::shared_ptr&)>; - - ModelDeserializer(std::shared_ptr& model_buffer, - ModelBuilder fn, - const CacheDecrypt& decrypt_fn, - bool decript_from_string); - - ModelDeserializer(std::istream& model_stream, - ModelBuilder fn, - const CacheDecrypt& decrypt_fn, - bool decript_from_string); - - virtual ~ModelDeserializer() = default; - - void operator>>(std::shared_ptr& model); - -protected: - static void set_info(pugi::xml_node& root, std::shared_ptr& model); - - void process_model(std::shared_ptr& model, const std::shared_ptr& model_buffer); - void process_model(std::shared_ptr& model, std::reference_wrapper model_stream); - - std::variant, std::reference_wrapper> m_model; - ModelBuilder m_model_builder; - CacheDecrypt m_cache_decrypt; - bool m_decript_from_string; -}; - -} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index 30d1a364cf33f0..b78ef0204bb19a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -53,7 +53,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::key_cache_precision.name()), RO_property(ov::value_cache_precision.name()), RO_property(ov::key_cache_group_size.name()), - RO_property(ov::value_cache_group_size.name()), + RO_property(ov::value_cache_group_size.name()) }; ov::Core ie; diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index b909a77c2192ae..bb786f3828becc 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -26,6 +26,9 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { auto RW_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RW); }; + auto WO_property = [](const std::string& propertyName) { + return ov::PropertyName(propertyName, ov::PropertyMutability::WO); + }; std::vector expectedSupportedProperties{ // read only @@ -38,6 +41,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RO_property(ov::device::capabilities.name()), RO_property(ov::device::type.name()), RO_property(ov::device::architecture.name()), + // Write only + WO_property(ov::weights_path.name()), // read write RW_property(ov::num_streams.name()), RW_property(ov::inference_num_threads.name()), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp new file mode 100644 index 00000000000000..7ce2c2848519d5 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/model_cache.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/compiled_model/model_cache.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace ov::test::behavior; + +INSTANTIATE_TEST_SUITE_P(smoke_, + WeightlessCacheAccuracy, + ::testing::Combine(::testing::Bool(), + ::testing::Bool(), + ::testing::ValuesIn(inference_modes), + ::testing::ValuesIn(model_dtypes), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + WeightlessCacheAccuracy::get_test_case_name); + +INSTANTIATE_TEST_SUITE_P(smoke_, + WeightlessCacheAccuracyLowPrecision, + ::testing::Combine(::testing::Bool(), + ::testing::Bool(), + ::testing::ValuesIn(inference_modes), + ::testing::ValuesIn(low_precision_dtypes), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + WeightlessCacheAccuracy::get_test_case_name); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 48d3c5dd16b612..fee18f63d451f3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -296,6 +296,7 @@ std::vector disabledTestPatterns() { R"(.*FC_3D_BF16.*MatMulLayerCPUTest.*)", // Issue: 163242 R"(.*bf16.*RNNSequenceCPUTest.*)", + R"(.*WeightlessCacheAccuracy.TiWithLstmCell.*model_dtype=bf16.*)", // Issue: 163250 R"(.*OnnxModelWithExtensionFromDSO.*)", // Issue: 163273 @@ -368,6 +369,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_ConcatSDPTransposeByChannelTest.*)"); // Issue: 168490 retVector.emplace_back(R"(.*CPU/CoreThreadingTest.smoke_QueryModel.*)"); + retVector.emplace_back(R"(.*WeightlessCacheAccuracy.*)"); #endif #if defined(OPENVINO_ARCH_ARM) diff --git a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt index 921125d9916d0f..623a3aee798467 100644 --- a/src/plugins/intel_cpu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/unit/CMakeLists.txt @@ -74,6 +74,7 @@ ov_add_test_target( openvino::shape_inference openvino_runtime_s unit_test_utils + openvino_xml_util ov_snippets_models snippets_test_utils ${MLAS_LIBRARY} diff --git a/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt b/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt index 88689b18b97e58..428a257eb5694a 100644 --- a/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/unit/vectorized/CMakeLists.txt @@ -48,6 +48,7 @@ ov_add_test_target( dnnl gmock openvino_runtime_s + openvino_xml_util unit_test_utils ov_snippets_models snippets_test_utils diff --git a/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp b/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp new file mode 100644 index 00000000000000..750393b8ef8cab --- /dev/null +++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/model_cache.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "openvino/core/type/element_type.hpp" + +namespace ov { +namespace test { +namespace behavior { + +typedef std::tuple WeightlessCacheAccuracyTestParams; + +class WeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface { +public: + static std::string get_test_case_name(const ::testing::TestParamInfo& obj); + +protected: + std::shared_ptr m_model; + std::string m_xml_path; + std::string m_bin_path; + std::string m_cache_path; + std::string m_cache_dir; + const char* m_target_device; + bool m_use_compile_model_api; + bool m_do_encryption; + ov::element::Type m_inference_mode; + ov::element::Type m_model_dtype; + + void SetUp() override; + void TearDown() override; + void run(); +}; + +class WeightlessCacheAccuracyLowPrecision : public WeightlessCacheAccuracy {}; + +static const std::vector inference_modes = { + ov::element::f32, + ov::element::f16, +}; + +static const std::vector model_dtypes = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +static const std::vector low_precision_dtypes = { + ov::element::u8, + ov::element::u4, + ov::element::i4, +}; + +} // namespace behavior +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp b/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp new file mode 100644 index 00000000000000..e8c8ba06de430c --- /dev/null +++ b/src/tests/functional/plugin/shared/src/behavior/compiled_model/model_cache.cpp @@ -0,0 +1,190 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/compiled_model/model_cache.hpp" + +#include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp" +#include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" +#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/util/codec_xor.hpp" +#include "shared_test_classes/subgraph/weights_decompression_builders.hpp" + +namespace ov { +namespace test { +namespace behavior { + +std::string WeightlessCacheAccuracy::get_test_case_name(const ::testing::TestParamInfo& obj) { + std::ostringstream result; + + result << "use_compile_model_api=" << utils::bool2str(std::get<0>(obj.param)); + result << "_do_encryption=" << utils::bool2str(std::get<1>(obj.param)); + result << "_inference_mode=" << std::get<2>(obj.param); + result << "_model_dtype=" << std::get<3>(obj.param); + result << "_device=" << std::get<4>(obj.param); + + return result.str(); +} + +void WeightlessCacheAccuracy::SetUp() { + std::string filePrefix = ov::test::utils::generateTestFilePrefix(); + m_xml_path = filePrefix + ".xml"; + m_bin_path = filePrefix + ".bin"; + m_cache_path = filePrefix + ".blob"; + m_cache_dir = filePrefix + "_cache_dir"; + + std::tie(m_use_compile_model_api, m_do_encryption, m_inference_mode, m_model_dtype, m_target_device) = GetParam(); +} + +void WeightlessCacheAccuracy::TearDown() { + std::remove(m_xml_path.c_str()); + std::remove(m_bin_path.c_str()); + std::remove(m_cache_path.c_str()); + + ov::test::utils::removeFilesWithExt(m_cache_dir, "blob"); + ov::test::utils::removeFilesWithExt(m_cache_dir, "cl_cache"); + ov::test::utils::removeDir(m_cache_dir); +} + +void WeightlessCacheAccuracy::run() { + ov::AnyMap config = {ov::cache_dir(m_cache_dir), + ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::hint::inference_precision(m_inference_mode)}; + ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::weights_path(m_bin_path), + ov::hint::inference_precision(m_inference_mode)}; + + if (m_do_encryption) { + ov::EncryptionCallbacks encryption_callbacks; + encryption_callbacks.encrypt = ov::util::codec_xor; + encryption_callbacks.decrypt = ov::util::codec_xor; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + } + auto core = ov::test::utils::PluginCache::get().core(); + ov::pass::Serialize(m_xml_path, m_bin_path).run_on_model(m_model); + + auto compiled_model = core->compile_model(m_xml_path, m_target_device, config); + + if (!m_use_compile_model_api) { + auto ofstr = std::ofstream(m_cache_path, std::ofstream::binary); + compiled_model.export_model(ofstr); + ofstr.close(); + } + + auto get_cache_path = [&]() { + std::string path; + if (m_use_compile_model_api) { + auto blobs = ov::test::utils::listFilesWithExt(m_cache_dir, "blob"); + EXPECT_EQ(blobs.size(), 1); + path = blobs[0]; + } else { + path = m_cache_path; + } + return path; + }; + + auto get_mod_time = [&](const std::string& path) { + struct stat result; + if (stat(path.c_str(), &result) == 0) { + return result.st_mtime; + } + return static_cast(0); + }; + + auto first_cache_path = get_cache_path(); + auto first_mod_time = get_mod_time(first_cache_path); + ASSERT_NE(first_mod_time, static_cast(0)); + + ov::CompiledModel imported_model; + if (m_use_compile_model_api) { + imported_model = core->compile_model(m_xml_path, m_target_device, config); + } else { + auto ifstr = std::ifstream(m_cache_path, std::ifstream::binary); + imported_model = core->import_model(ifstr, m_target_device, config_with_weights_path); + ifstr.close(); + } + + auto second_cache_path = get_cache_path(); + auto second_mod_time = get_mod_time(second_cache_path); + + // Something went wrong if a new cache is created during the second run. + ASSERT_EQ(first_mod_time, second_mod_time); + + auto orig_req = compiled_model.create_infer_request(); + auto new_req = imported_model.create_infer_request(); + + for (size_t param_idx = 0; param_idx < m_model->get_parameters().size(); ++param_idx) { + auto input = m_model->get_parameters().at(param_idx); + auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(), + input->get_shape(), + -100, + 100, + param_idx); + orig_req.set_tensor(input, tensor); + new_req.set_tensor(input, tensor); + } + + orig_req.infer(); + new_req.infer(); + + auto result_vector = m_model->get_results(); + for (auto& res : result_vector) { + auto orig_out = orig_req.get_tensor(res); + auto new_out = new_req.get_tensor(res); + ov::test::utils::compare(orig_out, new_out, m_inference_mode); + } +} + +TEST_P(WeightlessCacheAccuracy, ReadConcatSplitAssign) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, m_model_dtype)); + OV_ASSERT_NO_THROW(run()); +} + +TEST_P(WeightlessCacheAccuracy, SingleConcatWithConstant) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, m_model_dtype)); + OV_ASSERT_NO_THROW(run()); +} + +TEST_P(WeightlessCacheAccuracy, TiWithLstmCell) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + OV_ASSERT_NO_THROW(m_model = ov::test::utils::make_ti_with_lstm_cell(m_model_dtype)); + OV_ASSERT_NO_THROW(run()); +} + +TEST_P(WeightlessCacheAccuracyLowPrecision, MatmulWeightsDecompression) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::test::MatMulDecompressionShapeParams shape_params{{{}, {{1, 4, 16}}}, {1, 16, 32}}; + auto dynShape = shape_params.data_shape.first; + if (dynShape.rank() == 0) { + dynShape = shape_params.data_shape.second.front(); + } + ov::ParameterVector params{std::make_shared(ov::element::f32, dynShape)}; + const auto weights_subgraph = ov::test::initMatMulDecompressionSubgraph(shape_params.weights_shape, + shape_params.decompression_group_size, + ov::element::f32, + m_model_dtype, + ov::element::f32, + ov::element::dynamic, + true, + ov::test::DecompressionType::full, + ov::test::DecompressionType::full, + false); + auto matmul = std::make_shared(params[0], weights_subgraph); + + ov::ResultVector results; + for (const auto& output : matmul->outputs()) { + results.push_back(std::make_shared(output)); + } + m_model = std::make_shared(results, params, "MatmulWeightsDecompression"); + OV_ASSERT_NO_THROW(run()); +} + +} // namespace behavior +} // namespace test +} // namespace ov From 653b16cb8c770b99677227a8d7ce6591458462e6 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Mon, 29 Sep 2025 14:36:13 +0400 Subject: [PATCH 2/3] Fixes as per comments --- .../openvino/xml_util/xml_deserialize_util.hpp | 14 ++++++++++++++ src/core/xml_util/src/xml_deserialize_util.cpp | 16 +--------------- src/plugins/intel_cpu/src/plugin.cpp | 18 ++++++++---------- .../utils/graph_serializer/deserializer.hpp | 15 +-------------- 4 files changed, 24 insertions(+), 39 deletions(-) diff --git a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp index 653810b0ca3eaf..0f4baafa15d86a 100644 --- a/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp +++ b/src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp @@ -21,6 +21,20 @@ namespace ov::util { struct GenericLayerParams; +template +void str_to_container(const std::string& value, T& res) { + std::stringstream ss(value); + std::string field; + while (getline(ss, field, ',')) { + if (field.empty()) + OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect"); + std::stringstream fs(field); + typename T::value_type val; + fs >> val; + res.insert(res.end(), val); + } +} + class XmlDeserializer : public ov::AttributeVisitor { public: explicit XmlDeserializer(const pugi::xml_node& node, diff --git a/src/core/xml_util/src/xml_deserialize_util.cpp b/src/core/xml_util/src/xml_deserialize_util.cpp index 540bde2374bfdf..ad6a2e0b063f4b 100644 --- a/src/core/xml_util/src/xml_deserialize_util.cpp +++ b/src/core/xml_util/src/xml_deserialize_util.cpp @@ -42,26 +42,12 @@ bool getStrAttribute(const pugi::xml_node& node, const std::string& name, std::s return true; } -template -void str_to_container(const std::string& value, T& res) { - std::stringstream ss(value); - std::string field; - while (getline(ss, field, ',')) { - if (field.empty()) - OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect"); - std::stringstream fs(field); - typename T::value_type val; - fs >> val; - res.insert(res.end(), val); - } -} - template bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector& value) { std::string param; if (!getStrAttribute(node, name, param)) return false; - str_to_container(param, value); + ov::util::str_to_container(param, value); return true; } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 226c198851e29a..39f15f6bfec6d5 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -701,7 +701,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::string get_origin_weights_path(const ov::AnyMap& config) { +static std::string get_origin_weights_path(const ov::AnyMap& config) { ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED; std::string origin_weights_path; @@ -719,7 +719,7 @@ std::string get_origin_weights_path(const ov::AnyMap& config) { return origin_weights_path; } -bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) { +static bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) { bool decrypt_from_string = false; if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) { @@ -740,10 +740,9 @@ std::shared_ptr Plugin::import_model(std::istream& model_str ModelDeserializer deserializer( model_stream, - [this]( - const std::shared_ptr& model, - const std::shared_ptr& weights, - const std::shared_ptr& origin_weights) { + [this](const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights) { if (origin_weights == nullptr) { return get_core()->read_model(model, weights); } else { @@ -808,10 +807,9 @@ std::shared_ptr Plugin::import_model(const ov::Tensor& model ModelDeserializer deserializer( model_buffer, - [this]( - const std::shared_ptr& model, - const std::shared_ptr& weights, - const std::shared_ptr& origin_weights) { + [this](const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights) { if (origin_weights == nullptr) { return get_core()->read_model(model, weights); } else { diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp index 93476c24e5b6fc..9bc9c7a445c366 100644 --- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp @@ -17,22 +17,9 @@ namespace ov::intel_cpu { -template -void str_to_container(const std::string& value, T& res) { - std::stringstream ss(value); - std::string field; - while (getline(ss, field, ',')) { - OPENVINO_ASSERT(!field.empty(), "Cannot get vector of parameters! \"", value, "\" is incorrect"); - std::stringstream fs(field); - typename T::value_type val; - fs >> val; - res.insert(res.end(), val); - } -} - template bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector& value) { - str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value); + ov::util::str_to_container(ov::util::pugixml::get_str_attr(node, name.c_str()), value); return true; } From 46025379ffa2ae4291b58d81940d1acd900c93b3 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Thu, 2 Oct 2025 11:31:49 +0400 Subject: [PATCH 3/3] Fixes as per comments 2 --- src/plugins/intel_cpu/src/plugin.cpp | 110 +---------- .../utils/graph_serializer/deserializer.cpp | 186 +++++++++++------- .../utils/graph_serializer/deserializer.hpp | 24 ++- 3 files changed, 138 insertions(+), 182 deletions(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 39f15f6bfec6d5..e867f84fef18b3 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -720,15 +720,13 @@ static std::string get_origin_weights_path(const ov::AnyMap& config) { } static bool get_cache_decrypt_fn(const ov::AnyMap& config, CacheDecrypt& decrypt) { - bool decrypt_from_string = false; - if (auto it = config.find(ov::cache_encryption_callbacks.name()); it != config.end()) { const auto& encryption_callbacks = it->second.as(); decrypt.m_decrypt_str = encryption_callbacks.decrypt; - decrypt_from_string = true; + return true; + } else { + return false; } - - return decrypt_from_string; } std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { @@ -738,56 +736,7 @@ std::shared_ptr Plugin::import_model(std::istream& model_str auto decrypt_from_string = get_cache_decrypt_fn(config, decrypt); const auto origin_weights_path = get_origin_weights_path(config); - ModelDeserializer deserializer( - model_stream, - [this](const std::shared_ptr& model, - const std::shared_ptr& weights, - const std::shared_ptr& origin_weights) { - if (origin_weights == nullptr) { - return get_core()->read_model(model, weights); - } else { - // Custom deserialization for weightless mode - - pugi::xml_document xml_doc; - const auto root = [&] { - auto res = - xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); - OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); - return xml_doc.document_element(); - }(); - const auto opsets = [] { - std::unordered_map opsets; - for (const auto& [name, mk_opset] : ov::get_available_opsets()) { - opsets[name] = mk_opset(); - } - return opsets; - }(); - const auto version = static_cast(ov::util::pugixml::get_uint64_attr(root, "version", 0)); - - auto create_extensions_map = - [&]() -> std::unordered_map { - std::unordered_map exts; - std::vector m_extensions; - OV_CREATE_EXTENSION(m_extensions); - for (const auto& ext : m_extensions) { - if (auto base_ext = std::dynamic_pointer_cast(ext)) - exts.insert({base_ext->get_type_info(), base_ext}); - } - return exts; - }(); - - std::unordered_map> variables; - const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights; - XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version); - std::shared_ptr model; - visitor.on_attribute("net", model); - model->get_rt_info()["version"] = int64_t(version); - return model; - } - }, - decrypt, - decrypt_from_string, - origin_weights_path); + ModelDeserializer deserializer(model_stream, get_core(), decrypt, decrypt_from_string, origin_weights_path); return deserialize_model(deserializer, config); } @@ -805,56 +754,7 @@ std::shared_ptr Plugin::import_model(const ov::Tensor& model model_tensor.get_byte_size(), model_tensor); - ModelDeserializer deserializer( - model_buffer, - [this](const std::shared_ptr& model, - const std::shared_ptr& weights, - const std::shared_ptr& origin_weights) { - if (origin_weights == nullptr) { - return get_core()->read_model(model, weights); - } else { - // Custom deserialization for weightless mode - - pugi::xml_document xml_doc; - const auto root = [&] { - auto res = - xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); - OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); - return xml_doc.document_element(); - }(); - const auto opsets = [] { - std::unordered_map opsets; - for (const auto& [name, mk_opset] : ov::get_available_opsets()) { - opsets[name] = mk_opset(); - } - return opsets; - }(); - const auto version = static_cast(ov::util::pugixml::get_uint64_attr(root, "version", 0)); - - auto create_extensions_map = - [&]() -> std::unordered_map { - std::unordered_map exts; - std::vector m_extensions; - OV_CREATE_EXTENSION(m_extensions); - for (const auto& ext : m_extensions) { - if (auto base_ext = std::dynamic_pointer_cast(ext)) - exts.insert({base_ext->get_type_info(), base_ext}); - } - return exts; - }(); - - std::unordered_map> variables; - const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights; - XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version); - std::shared_ptr model; - visitor.on_attribute("net", model); - model->get_rt_info()["version"] = int64_t(version); - return model; - } - }, - decrypt, - decrypt_from_string, - origin_weights_path); + ModelDeserializer deserializer(model_buffer, get_core(), decrypt, decrypt_from_string, origin_weights_path); return deserialize_model(deserializer, config); } diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp index 8a45c184b129bc..ad12787c19739a 100644 --- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.cpp @@ -6,23 +6,32 @@ #include #include +#include #include #include #include #include +#include #include #include +#include #include "openvino/core/any.hpp" #include "openvino/core/except.hpp" +#include "openvino/core/extension.hpp" #include "openvino/core/memory_util.hpp" #include "openvino/core/model.hpp" +#include "openvino/core/op_extension.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/core/shape.hpp" #include "openvino/core/type/element_type.hpp" +#include "openvino/core/type.hpp" #include "openvino/op/convert.hpp" +#include "openvino/op/util/variable.hpp" +#include "openvino/opsets/opset.hpp" #include "openvino/pass/serialize.hpp" #include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/icore.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/tensor.hpp" #include "openvino/util/mmap_object.hpp" @@ -33,14 +42,19 @@ namespace ov::intel_cpu { ModelDeserializer::ModelDeserializer(std::shared_ptr& model_buffer, - ModelBuilder fn, + const std::shared_ptr& core, const CacheDecrypt& decrypt_fn, bool decript_from_string, - std::string origin_weights_path) + const std::string& origin_weights_path) : m_model(model_buffer), - m_model_builder(std::move(fn)), - m_decript_from_string(decript_from_string), - m_origin_weights_path(std::move(origin_weights_path)) { + m_core(core), + m_decript_from_string(decript_from_string) { + if (!origin_weights_path.empty() && std::filesystem::exists(origin_weights_path)) { + auto mmap = ov::load_mmap_object(origin_weights_path); + m_origin_weights_buf = + std::make_shared>>(mmap->data(), mmap->size(), mmap); + } + if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -49,14 +63,19 @@ ModelDeserializer::ModelDeserializer(std::shared_ptr& model_b } ModelDeserializer::ModelDeserializer(std::istream& model_stream, - ModelBuilder fn, + const std::shared_ptr& core, const CacheDecrypt& decrypt_fn, bool decript_from_string, - std::string origin_weights_path) + const std::string& origin_weights_path) : m_model(model_stream), - m_model_builder(std::move(fn)), - m_decript_from_string(decript_from_string), - m_origin_weights_path(std::move(origin_weights_path)) { + m_core(core), + m_decript_from_string(decript_from_string) { + if (!origin_weights_path.empty() && std::filesystem::exists(origin_weights_path)) { + auto mmap = ov::load_mmap_object(origin_weights_path); + m_origin_weights_buf = + std::make_shared>>(mmap->data(), mmap->size(), mmap); + } + if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -74,6 +93,53 @@ void ModelDeserializer::operator>>(std::shared_ptr& model) { m_model); } +std::shared_ptr ModelDeserializer::create_ov_model( + const std::shared_ptr& model_buf, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights) { + if (origin_weights == nullptr) { + return m_core->read_model(model_buf, weights); + } + + // Custom deserialization for weightless mode + + pugi::xml_document xml_doc; + const auto root = [&] { + auto res = + xml_doc.load_buffer(model_buf->get_ptr(), model_buf->size(), pugi::parse_default, pugi::encoding_utf8); + OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); + return xml_doc.document_element(); + }(); + const auto opsets = [] { + std::unordered_map opsets; + for (const auto& [name, mk_opset] : ov::get_available_opsets()) { + opsets[name] = mk_opset(); + } + return opsets; + }(); + const auto version = static_cast(ov::util::pugixml::get_uint64_attr(root, "version", 0)); + + auto create_extensions_map = [&]() -> std::unordered_map { + std::unordered_map exts; + std::vector m_extensions; + OV_CREATE_EXTENSION(m_extensions); + for (const auto& ext : m_extensions) { + if (auto base_ext = std::dynamic_pointer_cast(ext)) { + exts.insert({base_ext->get_type_info(), base_ext}); + } + } + return exts; + }(); + + std::unordered_map> variables; + const auto& w = (weights != nullptr && weights->size() != 0) ? weights : origin_weights; + XmlDeserializer visitor(root, w, origin_weights, opsets, create_extensions_map, variables, version); + std::shared_ptr model; + visitor.on_attribute("net", model); + model->get_rt_info()["version"] = static_cast(version); + return model; +} + void ModelDeserializer::process_model(std::shared_ptr& model, const std::shared_ptr& model_buffer) { // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. @@ -111,13 +177,6 @@ void ModelDeserializer::process_model(std::shared_ptr& model, model_buffer); } - std::shared_ptr origin_weights_buf; - if (!m_origin_weights_path.empty()) { - auto mmap = ov::load_mmap_object(m_origin_weights_path); - origin_weights_buf = - std::make_shared>>(mmap->data(), mmap->size(), mmap); - } - // XML content auto xml_buff = std::make_shared(); if (m_cache_decrypt) { @@ -134,7 +193,7 @@ void ModelDeserializer::process_model(std::shared_ptr& model, std::shared_ptr model_buf = std::make_shared>>((*xml_buff).data(), hdr.model_size, xml_buff); - model = m_model_builder(model_buf, weights_buf, origin_weights_buf); + model = create_ov_model(model_buf, weights_buf, m_origin_weights_buf); // Set Info pugi::xml_node root = xml_in_out_doc.child("cnndata"); @@ -180,13 +239,6 @@ void ModelDeserializer::process_model(std::shared_ptr& model, model_stream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } - std::shared_ptr origin_weights_buf; - if (!m_origin_weights_path.empty()) { - auto mmap = ov::load_mmap_object(m_origin_weights_path); - origin_weights_buf = - std::make_shared>>(mmap->data(), mmap->size(), mmap); - } - // read XML content auto xml_string = std::make_shared(); model_stream.seekg(hdr.model_offset + hdr_pos); @@ -211,7 +263,7 @@ void ModelDeserializer::process_model(std::shared_ptr& model, hdr.consts_size, data_blob); - model = m_model_builder(model_buf, weights_buf, origin_weights_buf); + model = create_ov_model(model_buf, weights_buf, m_origin_weights_buf); // Set Info pugi::xml_node root = xmlInOutDoc.child("cnndata"); @@ -250,49 +302,49 @@ void XmlDeserializer::set_constant_num_buffer(ov::AttributeAdapter(); } - if (is_wlc_way) { - const auto& wlc_attribute = wlc.as(); - - auto actual_size = wlc_attribute.original_size; - auto offset = wlc_attribute.bin_offset; - auto w_size = m_origin_weights->size(); - OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!"); - - auto original_dtype = wlc_attribute.original_dtype; - char* data = m_origin_weights->get_ptr() + offset; - - ov::Shape shape; - OPENVINO_ASSERT(getParameters(dn, "shape", shape), - "[ CPU ] Could not get attribute 'shape' during weights deserialization."); - - if (original_dtype != target_dtype) { - const auto org_tensor = ov::Tensor(original_dtype, shape, data); - auto converted_weights = - std::make_shared(ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); - auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}}; - auto convert = op::v0::Convert(); - OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported"); - adapter.set(converted_weights); - } else { - if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) { - const auto type = ov::util::pugixml::get_str_attr(get_node(), "type"); - OPENVINO_THROW("Attribute and shape size are inconsistent for ", - type, - " op!", - actual_size, - ", ", - ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3), - ", ", - ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); - } + if (!is_wlc_way) { + ov::util::XmlDeserializer::set_constant_num_buffer(adapter); + return; + } - auto buffer = std::make_shared>>(data, - actual_size, - m_origin_weights); - adapter.set(buffer); - } + const auto& wlc_attribute = wlc.as(); + + auto actual_size = wlc_attribute.original_size; + auto offset = wlc_attribute.bin_offset; + auto w_size = m_origin_weights->size(); + OPENVINO_ASSERT(w_size >= offset + actual_size, "Incorrect weights in bin file!"); + + auto original_dtype = wlc_attribute.original_dtype; + char* data = m_origin_weights->get_ptr() + offset; + + ov::Shape shape; + OPENVINO_ASSERT(getParameters(dn, "shape", shape), + "[ CPU ] Could not get attribute 'shape' during weights deserialization."); + + if (original_dtype != target_dtype) { + const auto org_tensor = ov::Tensor(original_dtype, shape, data); + auto converted_weights = + std::make_shared(ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); + auto converted_output = ov::TensorVector{{target_dtype, shape, converted_weights->get_ptr()}}; + auto convert = op::v0::Convert(); + OPENVINO_ASSERT(convert.evaluate(converted_output, {org_tensor}), "Conversion not supported"); + adapter.set(converted_weights); } else { - ov::util::XmlDeserializer::set_constant_num_buffer(adapter); + if (actual_size < ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3)) { + const auto type = ov::util::pugixml::get_str_attr(get_node(), "type"); + OPENVINO_THROW("Attribute and shape size are inconsistent for ", + type, + " op!", + actual_size, + ", ", + ((ov::shape_size(shape) * target_dtype.bitwidth() + 7) >> 3), + ", ", + ov::util::get_memory_size(target_dtype, ov::shape_size(shape))); + } + + auto buffer = + std::make_shared>>(data, actual_size, m_origin_weights); + adapter.set(buffer); } } diff --git a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp index 9bc9c7a445c366..67704c868ac23a 100644 --- a/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp +++ b/src/plugins/intel_cpu/src/utils/graph_serializer/deserializer.hpp @@ -15,6 +15,9 @@ #include "openvino/util/xml_parse_utils.hpp" #include "utils/codec_xor.hpp" +namespace ov { +class ICore; +} namespace ov::intel_cpu { template @@ -70,21 +73,17 @@ class XmlDeserializer : public ov::util::XmlDeserializer { class ModelDeserializer { public: - using ModelBuilder = std::function(const std::shared_ptr&, - const std::shared_ptr&, - const std::shared_ptr&)>; - ModelDeserializer(std::shared_ptr& model_buffer, - ModelBuilder fn, + const std::shared_ptr& core, const CacheDecrypt& decrypt_fn, bool decript_from_string, - std::string origin_weights_path = ""); + const std::string& origin_weights_path = ""); ModelDeserializer(std::istream& model_stream, - ModelBuilder fn, + const std::shared_ptr& core, const CacheDecrypt& decrypt_fn, bool decript_from_string, - std::string origin_weights_path = ""); + const std::string& origin_weights_path = ""); virtual ~ModelDeserializer() = default; @@ -94,13 +93,18 @@ class ModelDeserializer { static void set_info(pugi::xml_node& root, std::shared_ptr& model); void process_model(std::shared_ptr& model, const std::shared_ptr& model_buffer); + void process_model(std::shared_ptr& model, std::reference_wrapper model_stream); + std::shared_ptr create_ov_model(const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::shared_ptr& origin_weights); + std::variant, std::reference_wrapper> m_model; - ModelBuilder m_model_builder; + std::shared_ptr m_core; CacheDecrypt m_cache_decrypt; bool m_decript_from_string; - std::string m_origin_weights_path; + std::shared_ptr m_origin_weights_buf; }; } // namespace ov::intel_cpu