Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions samples/cpp/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,11 @@ int main(int argc, char* argv[]) {
if (is_virtual_device(device)) {
device_nstreams.erase(device);
}

if (!FLAGS_cache_dir.empty()) {
// Choose between better model compilation time and cache file size.
device_config[ov::cache_mode.name()] = ov::CacheMode::OPTIMIZE_SPEED;
}
}
auto result = std::find_if(config.begin(), config.end(), [&](const std::pair<std::string, ov::AnyMap>& item) {
return device_name.find(item.first) == 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/fp16_compression/mark_floatpoint_range.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"
#include "transformations/utils/utils.hpp"
Expand Down
4 changes: 1 addition & 3 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,7 @@ endif()
# some sources are located in openvino_core, while headers are in openvino_transformations
file(GLOB_RECURSE smart_reshape_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/smart_reshape/*.cpp)
file(GLOB_RECURSE rt_info_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/rt_info/*.cpp)
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_precision.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/init_node_info.cpp"
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/serialize.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/op/type_relaxed.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/preprocess/preprocess_steps_impl.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute {

bool is_copyable() const override;

bool visit_attributes(AttributeVisitor& visitor) override;

size_t original_size;
size_t bin_offset;
ov::element::Type original_dtype;
Expand Down
4 changes: 1 addition & 3 deletions src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ class OPENVINO_API XmlSerializer : public ov::AttributeVisitor {
virtual void append_rt_info(pugi::xml_node& node, ov::RTMap& attributes);
virtual bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute);
virtual bool append_node_attributes(ov::Node& node);
virtual util::ConstantWriter& get_constant_write_handler() const {
return m_constant_node_write_handler;
}
virtual util::ConstantWriter& get_constant_write_handler();

public:
XmlSerializer(pugi::xml_node& data,
Expand Down
9 changes: 8 additions & 1 deletion src/core/src/op/util/weightless_caching_attributes.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2024 Intel Corporation
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

Expand All @@ -11,6 +11,13 @@ bool ov::WeightlessCacheAttribute::is_copyable() const {
return false;
}

bool ov::WeightlessCacheAttribute::visit_attributes(AttributeVisitor& visitor) {
visitor.on_attribute("original_dtype", original_dtype);
visitor.on_attribute("bin_offset", bin_offset);
visitor.on_attribute("original_size", original_size);
return true;
}

OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr<ov::Node>& from,
const std::shared_ptr<ov::Node>& to) {
const auto& rt_info = from->get_rt_info();
Expand Down
6 changes: 3 additions & 3 deletions src/core/src/xml_util/xml_serialize_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,9 +1061,9 @@ bool XmlSerializer::append_node_attributes(ov::Node& node) {
return node.visit_attributes(*this);
}

// util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
// return m_constant_node_write_handler.get();
// }
util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
return m_constant_node_write_handler.get();
}

std::string get_ir_precision_name(const element::Type& precision) {
switch (precision) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@
namespace ov::util {
struct GenericLayerParams;

template <class T>
void str_to_container(const std::string& value, T& res) {
std::stringstream ss(value);
std::string field;
while (getline(ss, field, ',')) {
if (field.empty())
OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
std::stringstream fs(field);
typename T::value_type val;
fs >> val;
res.insert(res.end(), val);
}
}

class XmlDeserializer : public ov::AttributeVisitor {
public:
explicit XmlDeserializer(const pugi::xml_node& node,
Expand Down Expand Up @@ -53,6 +67,9 @@ class XmlDeserializer : public ov::AttributeVisitor {
virtual void set_constant_num_buffer(ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>& adapter);

const pugi::xml_node& get_node() const;
const std::shared_ptr<ov::AlignedBuffer>& get_weights() const {
return m_weights;
}

private:
struct IoMap {
Expand Down
16 changes: 1 addition & 15 deletions src/core/xml_util/src/xml_deserialize_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,12 @@ bool getStrAttribute(const pugi::xml_node& node, const std::string& name, std::s
return true;
}

template <class T>
void str_to_container(const std::string& value, T& res) {
std::stringstream ss(value);
std::string field;
while (getline(ss, field, ',')) {
if (field.empty())
OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
std::stringstream fs(field);
typename T::value_type val;
fs >> val;
res.insert(res.end(), val);
}
}

template <class T>
bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector<T>& value) {
std::string param;
if (!getStrAttribute(node, name, param))
return false;
str_to_container(param, value);
ov::util::str_to_container(param, value);
return true;
}

Expand Down
22 changes: 15 additions & 7 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/util/common_util.hpp"
#include "openvino/util/file_util.hpp"
#include "openvino/util/log.hpp"
#include "openvino/util/shared_object.hpp"
#include "openvino/util/variant_visitor.hpp"
#include "openvino/util/xml_parse_utils.hpp"
Expand Down Expand Up @@ -850,6 +851,16 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
const auto compiled_config = create_compile_config(plugin, parsed._config);
cache_content.blobId = ModelCache::compute_hash(model, cache_content.modelPath, compiled_config);
cache_content.model = model;

const auto& cache_mode_it = config.find(cache_mode.name());
if (cache_mode_it != config.end() && cache_mode_it->second == CacheMode::OPTIMIZE_SIZE) {
const auto& rt_info = model->get_rt_info();
auto weights_path = rt_info.find("__weights_path");
if (weights_path != rt_info.end()) {
parsed._config[ov::weights_path.name()] = weights_path->second;
}
}

const auto lock = cacheGuard.get_hash_lock(cache_content.blobId);
res = load_model_from_cache(cache_content, plugin, parsed._config, {}, [&]() {
return compile_model_and_cache(plugin, model, parsed._config, {}, cache_content);
Expand Down Expand Up @@ -1585,10 +1596,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
update_config[ov::hint::model.name()] = cacheContent.model;
}

if (util::contains(plugin.get_property(ov::supported_properties), ov::hint::model) &&
cacheContent.model) {
update_config[ov::hint::model.name()] = cacheContent.model;
}
if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
util::Path weights_path;

Expand All @@ -1597,7 +1604,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
weights_path = path_hint->second.as<std::string>();
} else if (weights_path = extract_weight_path(header.get_runtime_info()); weights_path.empty()) {
weights_path = cacheContent.modelPath;
weights_path.replace_extension(".bin");
}
weights_path.replace_extension(".bin");

Expand Down Expand Up @@ -1629,9 +1635,11 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
// throw;
}

// fallback scenario
if (!compiled_model)
// Fallback scenario
if (!compiled_model) {
OPENVINO_WARN("Could not load model from cache.");
compiled_model = compile_model_lambda();
}

return compiled_model;
}
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ ov_mark_target_as_cc(${TARGET_NAME})

target_link_libraries(${TARGET_NAME} PRIVATE dnnl
openvino::shape_inference
openvino::snippets)
openvino::snippets
openvino_xml_util)

target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
if (ENABLE_MLAS_FOR_CPU)
Expand Down Expand Up @@ -397,6 +398,7 @@ if(BUILD_SHARED_LIBS)
$<TARGET_PROPERTY:openvino::shape_inference,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino::snippets,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino::reference,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino::xml_util,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/src
$<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>)
Expand Down
10 changes: 6 additions & 4 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
#include "sub_memory_manager.hpp"
#include "utils/debug_capabilities.h"
#include "utils/general_utils.h"
#include "utils/graph_serializer/serializer.hpp"
#include "utils/memory_stats_dump.hpp"
#include "utils/serialize.hpp"

#if defined(OV_CPU_WITH_ACL)
# include <arm_compute/runtime/IScheduler.h>
Expand Down Expand Up @@ -303,8 +303,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
RO_property(ov::key_cache_precision.name()),
RO_property(ov::value_cache_precision.name()),
RO_property(ov::key_cache_group_size.name()),
RO_property(ov::value_cache_group_size.name()),
};
RO_property(ov::value_cache_group_size.name())};

return ro_properties;
}
Expand Down Expand Up @@ -400,11 +399,14 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
if (name == ov::value_cache_group_size) {
return static_cast<decltype(ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize);
}
if (name == ov::weights_path) {
return static_cast<decltype(ov::weights_path)::value_type>("");
}
OPENVINO_THROW("Unsupported property: ", name);
}

void CompiledModel::export_model(std::ostream& modelStream) const {
ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt);
ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt, m_cfg.m_cache_mode == ov::CacheMode::OPTIMIZE_SIZE);
serializer << m_model;
}

Expand Down
9 changes: 8 additions & 1 deletion src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,14 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
} catch (ov::Exception&) {
OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name());
}
} else if (key == ov::internal::caching_with_mmap.name()) {
} else if (key == ov::cache_mode.name()) {
try {
m_cache_mode = val.as<ov::CacheMode>();
} catch (...) {
OPENVINO_THROW("Wrong value for property key ", ov::cache_mode.name());
}
} else if (key == ov::hint::model.name() || key == ov::internal::caching_with_mmap.name() ||
key == ov::weights_path.name()) {
} else if (key == ov::intel_cpu::enable_sage_attn.name()) {
try {
enableSageAttn = val.as<bool>();
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ struct Config {
std::function<std::string(const std::string&)> cacheEncrypt;
std::function<std::string(const std::string&)> cacheDecrypt;

ov::CacheMode m_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;

#ifdef CPU_DEBUG_CAPS
DebugCapsConfig debugCaps;
void applyDebugCapsProperties();
Expand Down
Loading
Loading