From 30e9d5fae674086474e33f329db2afe3ac6d4078 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 19 Sep 2025 13:25:52 -0700 Subject: [PATCH 01/13] early version, it doesn't embed initializers into the proto, but then restores the metadata so OV can read them back Signed-off-by: bfilipek --- .../providers/openvino/backend_manager.cc | 168 +++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 99f28439db53a..72792fafa24ec 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -21,6 +21,7 @@ #include "core/providers/openvino/ov_versions/capability.h" #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" +#include "../../framework/tensorprotoutils.h" namespace onnxruntime { namespace openvino_ep { @@ -453,6 +454,46 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on #endif } +static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; + auto* external_data = proto_init->mutable_external_data(); + bool found_location = false, found_offset = false, found_length = false; + const int ext_data_size = external_data->size(); + proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); + + for (int j = 0; j < ext_data_size; ++j) { + auto& ext_entry = external_data->at(j); + auto& key = *ext_entry.mutable_key(); + if (key == "location") { + *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + found_location = true; + } else if (key == "offset") { + *ext_entry.mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + found_offset = true; + } else if (key == "length") { + *ext_entry.mutable_value() = std::to_string(data_size); + found_length = true; + } + } + + if (!found_location) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "location"; + *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + } + if (!found_offset) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "offset"; + *new_entry->mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + } + if (!found_length) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "length"; + *new_entry->mutable_value() = std::to_string(data_size); + } +} + + std::unique_ptr BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, @@ -529,12 +570,137 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, return model_proto; } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; + + static bool load_user_initializer_ = true; + size_t userWeightsFromRawData = 0; + size_t userWeightsFromExternalDataInMemory = 0; + size_t allInitializersCount = 0; + if (load_user_initializer_) { + auto allInitializers = subgraph.GetAllInitializedTensors(); + allInitializersCount = allInitializers.size(); + + for (auto& entry : allInitializers) { + auto* tp = entry.second; + if (tp->has_raw_data()) { + userWeightsFromRawData++; + } else if (utils::HasExternalDataInMemory(*tp)) { + userWeightsFromExternalDataInMemory++; + } + } + } + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Loaded " << allInitializersCount << " initializers from the model. " + << userWeightsFromRawData << " from raw_data, " + << userWeightsFromExternalDataInMemory << " from external_data."; + auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - subgraph.ToProto(*model_proto->mutable_graph(), true, true); + subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, /*include_outer_scope_args*/true, /*execution order*/0, /*include_initializer_data*/!load_user_initializer_); + print_model_proto_duration(); DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); + + // new code: + if (load_user_initializer_) + { + LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..."; + const auto& allInitializers = subgraph.GetAllInitializedTensors(); + auto* graph_proto = model_proto->mutable_graph(); + auto* proto_initializers = graph_proto->mutable_initializer(); + + // Build a map for quick lookup by name + std::unordered_map proto_initializer_map; + for (int i = 0, n = proto_initializers->size(); i < n; ++i) { + auto& proto_init = proto_initializers->at(i); + proto_initializer_map[proto_init.name()] = &proto_init; + } + + for (const auto& init_entry : allInitializers) { + const std::string& name = init_entry.first; + const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second; + + auto it = proto_initializer_map.find(name); + if (it == proto_initializer_map.end()) + continue; + + auto* proto_init = it->second; + + // If the proto initializer is missing data, fill it in + if (!proto_init->has_raw_data() && src_init->has_raw_data()) { + *proto_init->mutable_raw_data() = src_init->raw_data(); + } + + // Only set in-memory external_data fields if the data is in memory + if (src_init->has_raw_data()) { + // Debug info for in-memory initializers + LOGS(logger, VERBOSE) << "In-memory initializer RAW: " + << src_init->name() + << ", data_type: " << src_init->data_type() + << ", raw_data size: " << src_init->raw_data().size(); + + SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); + } + else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { + + using mutable_proto_t = ONNX_NAMESPACE::TensorProto*; + auto& mutable_proto = *const_cast(src_init); + auto* entry_protos = mutable_proto.mutable_external_data(); + std::string location; + size_t offset = 0; + size_t length = 0; + for (int i = 0; i < entry_protos->size(); i++) { + auto& string_entry_proto{ entry_protos->at(i) }; + const auto& pb_key{ *(string_entry_proto.mutable_key()) }; + const auto& pb_value{ *(string_entry_proto.mutable_value()) }; + if (pb_key == "location") { + location = pb_value; + } + else if (pb_key == "offset") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); + if (res.ec != std::errc()) { + LOGS(logger, ERROR) << "External data in memory has invalid offset field: " + << src_init->name() << "], location: " << location + << ", offset: " << pb_value; + offset = 0; + } + } + else if (pb_key == "length") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); + if (res.ec != std::errc()) { + LOGS(logger, ERROR) << "External data in memory has invalid length field: " + << src_init->name() << "], location: " << location + << ", length: " << pb_value; + offset = 0; + } + } + } + if (offset == 0 || length == 0) { + LOGS(logger, ERROR) << "External data in memory has invalid external_data fields: " + << src_init->name() << "], location: " << location + << ", offset: " << offset + << ", length: " << length; + } + else + { + // we have data in it, so populate the proto_init + LOGS(logger, VERBOSE) << "In-memory initializer EXT: " + << src_init->name() + << ", size: " << length; + + SetExternalDataFields(proto_init, (const void*)offset, length); + } + } + else { + // Debug info for file-based initializers + LOGS(logger, VERBOSE)<< "File-based initializer: " + << src_init->name() + << ", data_type: " << src_init->data_type(); + } + + } + + } + return model_proto; } } From 70c2f3715f803b8d3be45367c77a32188debe90f Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 26 Sep 2025 12:22:28 -0700 Subject: [PATCH 02/13] improve code, refactor into smaller functions, run the logic when there are external initializers in memory (more than one) Signed-off-by: bfilipek --- .../providers/openvino/backend_manager.cc | 196 ++++++++---------- 1 file changed, 91 insertions(+), 105 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 72792fafa24ec..9d448f53c6d5b 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -455,44 +455,76 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on } static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { - static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; - auto* external_data = proto_init->mutable_external_data(); - bool found_location = false, found_offset = false, found_length = false; - const int ext_data_size = external_data->size(); - proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); - - for (int j = 0; j < ext_data_size; ++j) { - auto& ext_entry = external_data->at(j); - auto& key = *ext_entry.mutable_key(); - if (key == "location") { - *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; - found_location = true; - } else if (key == "offset") { - *ext_entry.mutable_value() = std::to_string(reinterpret_cast(data_ptr)); - found_offset = true; - } else if (key == "length") { - *ext_entry.mutable_value() = std::to_string(data_size); - found_length = true; - } + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; + auto* external_data = proto_init->mutable_external_data(); + bool found_location = false, found_offset = false, found_length = false; + const int ext_data_size = external_data->size(); + proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); + + for (int j = 0; j < ext_data_size; ++j) { + auto& ext_entry = external_data->at(j); + auto& key = *ext_entry.mutable_key(); + if (key == "location") { + *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + found_location = true; + } else if (key == "offset") { + *ext_entry.mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + found_offset = true; + } else if (key == "length") { + *ext_entry.mutable_value() = std::to_string(data_size); + found_length = true; } + } - if (!found_location) { - auto* new_entry = external_data->Add(); - *new_entry->mutable_key() = "location"; - *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; - } - if (!found_offset) { - auto* new_entry = external_data->Add(); - *new_entry->mutable_key() = "offset"; - *new_entry->mutable_value() = std::to_string(reinterpret_cast(data_ptr)); - } - if (!found_length) { - auto* new_entry = external_data->Add(); - *new_entry->mutable_key() = "length"; - *new_entry->mutable_value() = std::to_string(data_size); - } + if (!found_location) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "location"; + *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + } + if (!found_offset) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "offset"; + *new_entry->mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + } + if (!found_length) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "length"; + *new_entry->mutable_value() = std::to_string(data_size); + } } +static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { + // Remove constness as we need to use mutable_external_data() to get the entries to read. + // The entries themselves are not modified... + auto& mutable_proto = *const_cast(src_init); + auto* entry_protos = mutable_proto.mutable_external_data(); + for (int i = 0; i < entry_protos->size(); i++) { + auto& string_entry_proto{entry_protos->at(i)}; + const auto& pb_key{*(string_entry_proto.mutable_key())}; + const auto& pb_value{*(string_entry_proto.mutable_value())}; + if (pb_key == "location") { + location = pb_value; + } else if (pb_key == "offset") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); + if (res.ec != std::errc()) { + std::ostringstream err_msg; + err_msg << "External data in memory has invalid offset field: " + << src_init->name() << "], location: " << location + << ", offset: " << pb_value; + ORT_THROW(err_msg.str()); + } + } else if (pb_key == "length") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); + if (res.ec != std::errc()) { + std::ostringstream err_msg; + err_msg << "External data in memory has invalid length field: " + << src_init->name() << "], location: " << location + << ", length: " << pb_value; + ORT_THROW(err_msg.str()); + } + } + } +} std::unique_ptr BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, @@ -571,37 +603,33 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; - static bool load_user_initializer_ = true; - size_t userWeightsFromRawData = 0; - size_t userWeightsFromExternalDataInMemory = 0; - size_t allInitializersCount = 0; - if (load_user_initializer_) { - auto allInitializers = subgraph.GetAllInitializedTensors(); - allInitializersCount = allInitializers.size(); - - for (auto& entry : allInitializers) { - auto* tp = entry.second; - if (tp->has_raw_data()) { - userWeightsFromRawData++; - } else if (utils::HasExternalDataInMemory(*tp)) { - userWeightsFromExternalDataInMemory++; + const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable { + auto allInitializers = subgraph.GetAllInitializedTensors(); + for (auto& [name, tp] : allInitializers) { + if (utils::HasExternalDataInMemory(*tp)) { + ++cnt; } } - } - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Loaded " << allInitializersCount << " initializers from the model. " - << userWeightsFromRawData << " from raw_data, " - << userWeightsFromExternalDataInMemory << " from external_data."; + return cnt; + }(); + + // when we have external weights in memory, the model proto will actually embed those + // and bloat the serialized string. We can avoid that by not including the data in the proto + // but then we have to update those initializers and set the external_data fields to mem_addr tag... + // 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1); auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, /*include_outer_scope_args*/true, /*execution order*/0, /*include_initializer_data*/!load_user_initializer_); + subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, + /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); print_model_proto_duration(); DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); // new code: - if (load_user_initializer_) + if (!include_initializer_data_in_proto) { LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..."; const auto& allInitializers = subgraph.GetAllInitializedTensors(); @@ -632,7 +660,6 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, // Only set in-memory external_data fields if the data is in memory if (src_init->has_raw_data()) { - // Debug info for in-memory initializers LOGS(logger, VERBOSE) << "In-memory initializer RAW: " << src_init->name() << ", data_type: " << src_init->data_type() @@ -640,55 +667,17 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); } - else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { - - using mutable_proto_t = ONNX_NAMESPACE::TensorProto*; - auto& mutable_proto = *const_cast(src_init); - auto* entry_protos = mutable_proto.mutable_external_data(); + else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { std::string location; size_t offset = 0; size_t length = 0; - for (int i = 0; i < entry_protos->size(); i++) { - auto& string_entry_proto{ entry_protos->at(i) }; - const auto& pb_key{ *(string_entry_proto.mutable_key()) }; - const auto& pb_value{ *(string_entry_proto.mutable_value()) }; - if (pb_key == "location") { - location = pb_value; - } - else if (pb_key == "offset") { - const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); - if (res.ec != std::errc()) { - LOGS(logger, ERROR) << "External data in memory has invalid offset field: " - << src_init->name() << "], location: " << location - << ", offset: " << pb_value; - offset = 0; - } - } - else if (pb_key == "length") { - const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); - if (res.ec != std::errc()) { - LOGS(logger, ERROR) << "External data in memory has invalid length field: " - << src_init->name() << "], location: " << location - << ", length: " << pb_value; - offset = 0; - } - } - } - if (offset == 0 || length == 0) { - LOGS(logger, ERROR) << "External data in memory has invalid external_data fields: " - << src_init->name() << "], location: " << location - << ", offset: " << offset - << ", length: " << length; - } - else - { - // we have data in it, so populate the proto_init - LOGS(logger, VERBOSE) << "In-memory initializer EXT: " - << src_init->name() - << ", size: " << length; - - SetExternalDataFields(proto_init, (const void*)offset, length); - } + ReadExternalDataFields(src_init, location, offset, length); + + LOGS(logger, VERBOSE) << "In-memory initializer EXT: " + << src_init->name() + << ", size: " << length; + + SetExternalDataFields(proto_init, (const void*)offset, length); } else { // Debug info for file-based initializers @@ -838,10 +827,7 @@ void BackendManager::Compute(OrtKernelContext* context) { { std::unique_lock lock(mutex_); - auto it = backend_map_.find(key); - if (it != backend_map_.end()) { - dynamic_backend = it->second; - } + dynamic_backend = backend_map_[key]; } if (!dynamic_backend) { From 80de8ef02c18f5efff80bb8168ce9d2ad61b526b Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 26 Sep 2025 12:36:41 -0700 Subject: [PATCH 03/13] revert the wrongly merged code Signed-off-by: bfilipek --- onnxruntime/core/providers/openvino/backend_manager.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 9d448f53c6d5b..91ed230ea528f 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -827,7 +827,10 @@ void BackendManager::Compute(OrtKernelContext* context) { { std::unique_lock lock(mutex_); - dynamic_backend = backend_map_[key]; + auto it = backend_map_.find(key); + if (it != backend_map_.end()) { + dynamic_backend = it->second; + } } if (!dynamic_backend) { From ef6f23d4323cb8f6d53b8c3628a138583de33c4d Mon Sep 17 00:00:00 2001 From: bfilipek Date: Tue, 30 Sep 2025 05:22:12 -0700 Subject: [PATCH 04/13] Updated the condition for the new logic based on the total size of ext initializers, comments, refactoring Signed-off-by: bfilipek --- .../providers/openvino/backend_manager.cc | 130 +++++++++--------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 91ed230ea528f..5acf127cc45c2 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -454,8 +454,10 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on #endif } +// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto +// but we cannot use that function as it is not part of public provider api. static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { - static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; auto* external_data = proto_init->mutable_external_data(); bool found_location = false, found_offset = false, found_length = false; const int ext_data_size = external_data->size(); @@ -494,7 +496,7 @@ static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const } static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { - // Remove constness as we need to use mutable_external_data() to get the entries to read. + // Remove constness as we need to use mutable_external_data() to get the entries to read. // The entries themselves are not modified... auto& mutable_proto = *const_cast(src_init); auto* entry_protos = mutable_proto.mutable_external_data(); @@ -603,21 +605,31 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; - const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable { + // scan ext initializers: + std::unordered_map> external_initializers_offset_and_length; + std::string tempLocation; + size_t extInitializerTotalSize = 0; + if (session_context_.has_external_weights) { auto allInitializers = subgraph.GetAllInitializedTensors(); for (auto& [name, tp] : allInitializers) { if (utils::HasExternalDataInMemory(*tp)) { - ++cnt; + size_t offset = 0; + size_t length = 0; + ReadExternalDataFields(tp, tempLocation, offset, length); + extInitializerTotalSize += length; + external_initializers_offset_and_length[name] = {offset, length}; } - } - return cnt; - }(); + } + } // when we have external weights in memory, the model proto will actually embed those // and bloat the serialized string. We can avoid that by not including the data in the proto // but then we have to update those initializers and set the external_data fields to mem_addr tag... - // 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort - const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1); + // proto is limited to 2GB, but let's use 512MB as threshold to be conservative and still gain some memory reductions. + constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 512; + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && + external_initializers_offset_and_length.size() > 1 && + extInitializerTotalSize > MAX_EMBEDDED_INITIALIZER_SIZE); auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); @@ -628,66 +640,54 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, print_model_proto_duration(); DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); - // new code: - if (!include_initializer_data_in_proto) - { - LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..."; - const auto& allInitializers = subgraph.GetAllInitializedTensors(); - auto* graph_proto = model_proto->mutable_graph(); - auto* proto_initializers = graph_proto->mutable_initializer(); - - // Build a map for quick lookup by name - std::unordered_map proto_initializer_map; - for (int i = 0, n = proto_initializers->size(); i < n; ++i) { - auto& proto_init = proto_initializers->at(i); - proto_initializer_map[proto_init.name()] = &proto_init; - } + if (!include_initializer_data_in_proto) { + LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers"; + auto* graph_proto = model_proto->mutable_graph(); + auto* proto_initializers = graph_proto->mutable_initializer(); + + std::unordered_map proto_initializer_map; + for (int i = 0, n = proto_initializers->size(); i < n; ++i) { + auto& proto_init = proto_initializers->at(i); + proto_initializer_map[proto_init.name()] = &proto_init; + } + + for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) { + auto it = proto_initializer_map.find(name); + if (it == proto_initializer_map.end()) + continue; - for (const auto& init_entry : allInitializers) { - const std::string& name = init_entry.first; - const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second; - - auto it = proto_initializer_map.find(name); - if (it == proto_initializer_map.end()) - continue; - - auto* proto_init = it->second; - - // If the proto initializer is missing data, fill it in - if (!proto_init->has_raw_data() && src_init->has_raw_data()) { - *proto_init->mutable_raw_data() = src_init->raw_data(); - } - - // Only set in-memory external_data fields if the data is in memory - if (src_init->has_raw_data()) { - LOGS(logger, VERBOSE) << "In-memory initializer RAW: " - << src_init->name() - << ", data_type: " << src_init->data_type() - << ", raw_data size: " << src_init->raw_data().size(); - - SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); - } - else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { - std::string location; - size_t offset = 0; - size_t length = 0; - ReadExternalDataFields(src_init, location, offset, length); - - LOGS(logger, VERBOSE) << "In-memory initializer EXT: " - << src_init->name() - << ", size: " << length; - - SetExternalDataFields(proto_init, (const void*)offset, length); - } - else { - // Debug info for file-based initializers - LOGS(logger, VERBOSE)<< "File-based initializer: " - << src_init->name() - << ", data_type: " << src_init->data_type(); - } + auto* proto_init = it->second; + // If the proto initializer is missing data, fill it in + if (!proto_init->has_raw_data() && src_init->has_raw_data()) { + *proto_init->mutable_raw_data() = src_init->raw_data(); } + // Only set in-memory external_data fields if the data is in memory + if (src_init->has_raw_data()) { + LOGS(logger, VERBOSE) << "In-memory initializer RAW: " + << src_init->name() + << ", data_type: " << src_init->data_type() + << ", raw_data size: " << src_init->raw_data().size(); + + SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); + } else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { + auto it_ext = external_initializers_offset_and_length.find(name); + if (it_ext == external_initializers_offset_and_length.end()) { + std::ostringstream err_msg; + err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name(); + ORT_THROW(err_msg.str()); + } + const size_t offset = it_ext->second.first; + const size_t length = it_ext->second.second; + + LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length; + + SetExternalDataFields(proto_init, (const void*)offset, length); + } else { + LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type(); + } + } } return model_proto; From 165a661a5314d13d3030c92a208cfed4b2ca6072 Mon Sep 17 00:00:00 2001 From: MayureshV1 <47039074+MayureshV1@users.noreply.github.com> Date: Wed, 1 Oct 2025 17:33:20 -0700 Subject: [PATCH 05/13] Update onnxruntime/core/providers/openvino/backend_manager.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- onnxruntime/core/providers/openvino/backend_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 5acf127cc45c2..8aec80ae22c54 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -457,7 +457,7 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on // this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto // but we cannot use that function as it is not part of public provider api. static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { - static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; auto* external_data = proto_init->mutable_external_data(); bool found_location = false, found_offset = false, found_length = false; const int ext_data_size = external_data->size(); From fe2cf8c97680091a8d5e6c79fb03d0d58d7ee47f Mon Sep 17 00:00:00 2001 From: bfilipek Date: Thu, 2 Oct 2025 14:00:49 -0700 Subject: [PATCH 06/13] make the condition less strict - 32MB threshold, move debug dump after the logic is executed, check for OV version Signed-off-by: bfilipek --- .../providers/openvino/backend_manager.cc | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 8aec80ae22c54..989d1022f1d7b 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -619,27 +619,31 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, extInitializerTotalSize += length; external_initializers_offset_and_length[name] = {offset, length}; } - } + } } // when we have external weights in memory, the model proto will actually embed those // and bloat the serialized string. We can avoid that by not including the data in the proto // but then we have to update those initializers and set the external_data fields to mem_addr tag... - // proto is limited to 2GB, but let's use 512MB as threshold to be conservative and still gain some memory reductions. - constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 512; - const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && + // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions. +#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025)) + constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32; + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && external_initializers_offset_and_length.size() > 1 && - extInitializerTotalSize > MAX_EMBEDDED_INITIALIZER_SIZE); + extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE); +#else + const bool include_initializer_data_in_proto = true; +#endif + auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); - + print_model_proto_duration(); - DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); - + if (!include_initializer_data_in_proto) { LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers"; auto* graph_proto = model_proto->mutable_graph(); @@ -690,6 +694,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, } } + DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); + return model_proto; } } From e6727b125192d2519282092dd1c8d490ca8eed5c Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 3 Oct 2025 06:33:34 -0700 Subject: [PATCH 07/13] unit test that uses ext initializers, early version Signed-off-by: bfilipek --- .../openvino/openvino_ep_ext_init.cc | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc new file mode 100644 index 0000000000000..bdcfb4678f034 --- /dev/null +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -0,0 +1,222 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include + +#include "core/session/onnxruntime_cxx_api.h" + +#include "test/util/include/test/test_environment.h" +#include "test/unittest_util/qdq_test_utils.h" + +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +using namespace ONNX_NAMESPACE; +using namespace onnxruntime::logging; + +extern std::unique_ptr ort_env; + +class OVEP_ExtInit_Tests : public ::testing::TestWithParam {}; + +namespace { + +std::vector LoadFileToMemory(const std::string& path) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return std::vector(); + } + std::streamsize size = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buffer(static_cast(size)); + if (!file.read(reinterpret_cast(buffer.data()), size)) { + return std::vector(); + } + return buffer; +} + +std::wstring StringToWstring(const std::string& str) { + return std::wstring(str.begin(), str.end()); +} + +auto ProbeDevice(const std::string& device) { + static std::map is_present; + if (is_present.find(device) == is_present.end()) { + Ort::SessionOptions sessionOptions; + std::unordered_map ov_options; + ov_options["device_type"] = device; + try { + sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options); + is_present[device] = true; + } catch (...) { + is_present[device] = false; + } + } + return is_present[device]; +} +} // namespace detail + +namespace onnxruntime { +namespace test { + +TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { + Ort::SessionOptions sessionOptions; + std::unordered_map ov_options; + const auto& device = GetParam(); + if (!ProbeDevice(device)) + GTEST_SKIP() << device + " is not available on this machine"; + + // wait 7 second for debugger + std::cout << "Waiting 7 seconds for debugger to attach if needed..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(7)); + + // Model and weights file paths + const std::string model_path = "ovep_ext_init_test.onnx"; + const std::string weights_path = "ovep_ext_init_test.onnx.data"; + const size_t num_initializers = 4; + const size_t floats_per_initializer = 2 * 1024 * 1024; // 2 millions floats per initializer, 8MB bytes + const size_t total_floats = num_initializers * floats_per_initializer; + const size_t total_bytes = total_floats * sizeof(float); + // min size threshold for new logic with ext initializers + ASSERT_GE(total_bytes, 32 * 1024 * 1024); + + // 1. Create initializers + std::vector> initializer_data; + for (size_t i = 0; i < num_initializers; ++i) + initializer_data.emplace_back(floats_per_initializer, static_cast(i + 1)); // W0:1, W1:2... + + // 2. Build ONNX model with 4 external initializers, and 4 ADD nodes + { + ModelProto model_proto; + model_proto.set_ir_version(7); + model_proto.set_producer_name("openvino_extinit_test"); + model_proto.set_producer_version("1.0"); + model_proto.set_domain(""); + model_proto.set_model_version(1); + + auto* graph = model_proto.mutable_graph(); + graph->set_name("TestGraph"); + + // Input: shape [floats_per_initializer] + auto* input = graph->add_input(); + input->set_name("X"); + auto* input_type = input->mutable_type()->mutable_tensor_type(); + input_type->set_elem_type(TensorProto_DataType_FLOAT); + input_type->mutable_shape()->add_dim()->set_dim_value(floats_per_initializer); + + // Output: shape [floats_per_initializer] + auto* output = graph->add_output(); + output->set_name("Y"); + auto* output_type = output->mutable_type()->mutable_tensor_type(); + output_type->set_elem_type(TensorProto_DataType_FLOAT); + output_type->mutable_shape()->add_dim()->set_dim_value(floats_per_initializer); + + auto* opset_import = model_proto.add_opset_import(); + opset_import->set_domain(""); + opset_import->set_version(19); + + // Add initializers as external data + size_t offset = 0; + std::vector initializer_names; + for (size_t i = 0; i < num_initializers; ++i) { + std::string name = "W" + std::to_string(i); + initializer_names.push_back(name); + TensorProto* initializer = graph->add_initializer(); + initializer->set_name(name); + initializer->set_data_type(TensorProto_DataType_FLOAT); + initializer->add_dims(floats_per_initializer); + initializer->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* ext = initializer->add_external_data(); + ext->set_key("location"); + ext->set_value(weights_path); + ext = initializer->add_external_data(); + ext->set_key("offset"); + ext->set_value(std::to_string(offset)); + ext = initializer->add_external_data(); + ext->set_key("length"); + ext->set_value(std::to_string(floats_per_initializer * sizeof(float))); + offset += floats_per_initializer * sizeof(float); + } + + // nodes: X -> Add with Init[0] -> ... -> output Y + std::string prev_output = "X"; + std::string node_output; + for (size_t i = 0; i < num_initializers; ++i) { + node_output = (i == num_initializers - 1) ? "Y" : "A" + std::to_string(i); + auto* add_node = graph->add_node(); + add_node->set_op_type("Add"); + add_node->add_input(prev_output); + add_node->add_input(initializer_names[i]); + add_node->add_output(node_output); + prev_output = node_output; + } + + // Save model + std::ofstream model_file(model_path, std::ios::binary); + ASSERT_TRUE(model_proto.SerializeToOstream(&model_file)); + model_file.close(); + } + + // 3. Save weights file (concatenate all initializers) + { + std::ofstream weights_file(weights_path, std::ios::binary); + ASSERT_TRUE(weights_file.is_open()); + for (const auto& w : initializer_data) { + weights_file.write(reinterpret_cast(w.data()), w.size() * sizeof(float)); + } + weights_file.close(); + } + + // 4. Load model and weights into memory + std::vector model_data = LoadFileToMemory(model_path); + std::vector weights_data = LoadFileToMemory(weights_path); + + // 5. Prepare external initializer info + std::wstring weights_name_w = StringToWstring(std::filesystem::path(weights_path).filename().string()); + std::vector names_w = {weights_name_w}; + std::vector buffers = {reinterpret_cast(weights_data.data())}; + std::vector buffer_sizes = {weights_data.size()}; + + // 6. Set up session options with OpenVINO + Ort::SessionOptions session_options; + // session_options.SetIntraOpNumThreads(1); + session_options.SetLogSeverityLevel(0); // verbose... + session_options.AppendExecutionProvider("OpenVINO", { {"device_type", device.c_str()} }); + session_options.AddExternalInitializersFromFilesInMemory(names_w, buffers, buffer_sizes); + + // 7. Create session from memory + Ort::Session session(*ort_env, model_data.data(), model_data.size(), session_options); + + // 8. Run inference to verify weights are loaded + std::vector input_data(floats_per_initializer, 2.0f); // shape [4194304] + std::vector input_shape = {static_cast(floats_per_initializer)}; + Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtDeviceAllocator, OrtMemTypeDefault); + Ort::Value input_tensor = Ort::Value::CreateTensor(mem_info, input_data.data(), input_data.size(), input_shape.data(), input_shape.size()); + + std::vector input_names = {"X"}; + std::vector output_names = {"Y"}; + std::vector output_tensors(1); + + session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), output_tensors.data(), 1); + + // Check output: should be input + W0 + W1 + W2... + auto* out_data = output_tensors[0].GetTensorMutableData(); + float expected = input_data[0]; + for (size_t i = 0; i < num_initializers; ++i) { + expected += initializer_data[i][0]; + } + // Check first 10 elements + for (size_t i = 0; i < std::min(10, floats_per_initializer); ++i) + ASSERT_FLOAT_EQ(out_data[i], expected); + + // Cleanup + std::filesystem::remove(model_path); + std::filesystem::remove(weights_path); +} +INSTANTIATE_TEST_SUITE_P(OVEP_Tests, + OVEP_ExtInit_Tests, + ::testing::Values(/*"CPU",*/ "GPU"/*"NPU"*/)); + +} // namespace test +} // namespace onnxruntime From 033b6f95939795a1d15ea54e34e7cf4e634dcc55 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 3 Oct 2025 12:51:14 -0700 Subject: [PATCH 08/13] used kOrtSessionOptionsDisableCPUEPFallback, cleanups, model is now over 2GB to show the proto limit (when the new logic for ext initializers is enabled, then the test passes) Signed-off-by: bfilipek --- .../openvino/openvino_ep_ext_init.cc | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index bdcfb4678f034..ae910c1e5ca9f 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -12,6 +12,7 @@ #include "gtest/gtest.h" #include "gmock/gmock.h" +#include "onnxruntime_session_options_config_keys.h" using namespace ONNX_NAMESPACE; using namespace onnxruntime::logging; @@ -61,21 +62,19 @@ namespace onnxruntime { namespace test { TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { - Ort::SessionOptions sessionOptions; - std::unordered_map ov_options; const auto& device = GetParam(); if (!ProbeDevice(device)) GTEST_SKIP() << device + " is not available on this machine"; // wait 7 second for debugger - std::cout << "Waiting 7 seconds for debugger to attach if needed..." << std::endl; - std::this_thread::sleep_for(std::chrono::seconds(7)); + //std::cout << "Waiting 7 seconds for debugger to attach if needed..." << std::endl; + //std::this_thread::sleep_for(std::chrono::seconds(7)); // Model and weights file paths const std::string model_path = "ovep_ext_init_test.onnx"; const std::string weights_path = "ovep_ext_init_test.onnx.data"; - const size_t num_initializers = 4; - const size_t floats_per_initializer = 2 * 1024 * 1024; // 2 millions floats per initializer, 8MB bytes + const size_t num_initializers = 8; + const size_t floats_per_initializer = 64 * 1024 * 1024; // 64 millions floats per initializer, 256MB const size_t total_floats = num_initializers * floats_per_initializer; const size_t total_bytes = total_floats * sizeof(float); // min size threshold for new logic with ext initializers @@ -180,16 +179,18 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { // 6. Set up session options with OpenVINO Ort::SessionOptions session_options; - // session_options.SetIntraOpNumThreads(1); - session_options.SetLogSeverityLevel(0); // verbose... - session_options.AppendExecutionProvider("OpenVINO", { {"device_type", device.c_str()} }); + session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1"); + session_options.SetIntraOpNumThreads(1); + //session_options.SetLogSeverityLevel(0); // verbose... + std::unordered_map ov_options = { {"device_type", device } }; + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); session_options.AddExternalInitializersFromFilesInMemory(names_w, buffers, buffer_sizes); // 7. Create session from memory Ort::Session session(*ort_env, model_data.data(), model_data.size(), session_options); // 8. Run inference to verify weights are loaded - std::vector input_data(floats_per_initializer, 2.0f); // shape [4194304] + std::vector input_data(floats_per_initializer, 2.0f); std::vector input_shape = {static_cast(floats_per_initializer)}; Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtDeviceAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor(mem_info, input_data.data(), input_data.size(), input_shape.data(), input_shape.size()); @@ -216,7 +217,7 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { } INSTANTIATE_TEST_SUITE_P(OVEP_Tests, OVEP_ExtInit_Tests, - ::testing::Values(/*"CPU",*/ "GPU"/*"NPU"*/)); + ::testing::Values("CPU", "GPU", "NPU")); } // namespace test } // namespace onnxruntime From d4e41c9b8fb8e727c25bff12a7531e765a545ccd Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 3 Oct 2025 14:02:14 -0700 Subject: [PATCH 09/13] address code review comments Signed-off-by: bfilipek --- .../test/providers/openvino/openvino_ep_ext_init.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index ae910c1e5ca9f..3a5ef0f7b5566 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -65,10 +65,6 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { const auto& device = GetParam(); if (!ProbeDevice(device)) GTEST_SKIP() << device + " is not available on this machine"; - - // wait 7 second for debugger - //std::cout << "Waiting 7 seconds for debugger to attach if needed..." << std::endl; - //std::this_thread::sleep_for(std::chrono::seconds(7)); // Model and weights file paths const std::string model_path = "ovep_ext_init_test.onnx"; @@ -207,8 +203,8 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { for (size_t i = 0; i < num_initializers; ++i) { expected += initializer_data[i][0]; } - // Check first 10 elements - for (size_t i = 0; i < std::min(10, floats_per_initializer); ++i) + + for (size_t i = 0; i < floats_per_initializer; ++i) ASSERT_FLOAT_EQ(out_data[i], expected); // Cleanup From cba53b33f1b092e0405c7fd61098e62d658b617b Mon Sep 17 00:00:00 2001 From: MayureshV1 <47039074+MayureshV1@users.noreply.github.com> Date: Fri, 3 Oct 2025 17:18:40 -0700 Subject: [PATCH 10/13] Update onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index 3a5ef0f7b5566..8dc4c34052de0 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -177,7 +177,6 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { Ort::SessionOptions session_options; session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1"); session_options.SetIntraOpNumThreads(1); - //session_options.SetLogSeverityLevel(0); // verbose... std::unordered_map ov_options = { {"device_type", device } }; session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); session_options.AddExternalInitializersFromFilesInMemory(names_w, buffers, buffer_sizes); From 8a5fe0e4600fad99198da834bb64a55ee1dfc4bb Mon Sep 17 00:00:00 2001 From: bfilipek Date: Mon, 6 Oct 2025 00:38:32 -0700 Subject: [PATCH 11/13] fix the Linux CI build, use PathString rather than wstring Signed-off-by: bfilipek --- .../test/providers/openvino/openvino_ep_ext_init.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index 8dc4c34052de0..8f9181334f9b4 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -37,10 +37,6 @@ std::vector LoadFileToMemory(const std::string& path) { return buffer; } -std::wstring StringToWstring(const std::string& str) { - return std::wstring(str.begin(), str.end()); -} - auto ProbeDevice(const std::string& device) { static std::map is_present; if (is_present.find(device) == is_present.end()) { @@ -168,8 +164,8 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { std::vector weights_data = LoadFileToMemory(weights_path); // 5. Prepare external initializer info - std::wstring weights_name_w = StringToWstring(std::filesystem::path(weights_path).filename().string()); - std::vector names_w = {weights_name_w}; + PathString weights_name_path(weights_path.begin(), weights_path.end()); + std::vector names_path = {weights_name_path}; std::vector buffers = {reinterpret_cast(weights_data.data())}; std::vector buffer_sizes = {weights_data.size()}; @@ -179,7 +175,7 @@ TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { session_options.SetIntraOpNumThreads(1); std::unordered_map ov_options = { {"device_type", device } }; session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); - session_options.AddExternalInitializersFromFilesInMemory(names_w, buffers, buffer_sizes); + session_options.AddExternalInitializersFromFilesInMemory(names_path, buffers, buffer_sizes); // 7. Create session from memory Ort::Session session(*ort_env, model_data.data(), model_data.size(), session_options); From 2a0fa0aadcb5e4927f5c3dd4eecfdcdc2a8a9279 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Mon, 6 Oct 2025 05:34:02 -0700 Subject: [PATCH 12/13] As agreed, disable the test as it requires OV 2025.4, while the current CI version is only 2025.2 Signed-off-by: bfilipek --- onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index 8f9181334f9b4..344189f4214d2 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -57,7 +57,7 @@ auto ProbeDevice(const std::string& device) { namespace onnxruntime { namespace test { -TEST_P(OVEP_ExtInit_Tests, ModelFromExtInit) { +TEST_P(OVEP_ExtInit_Tests, DISABLED_ModelFromExtInit) { const auto& device = GetParam(); if (!ProbeDevice(device)) GTEST_SKIP() << device + " is not available on this machine"; From a3457378157dcf7f05e88f652a38b2b2d830d222 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Mon, 6 Oct 2025 05:56:09 -0700 Subject: [PATCH 13/13] add missing comment Signed-off-by: bfilipek --- onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc index 344189f4214d2..21ec61c2d2e3f 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc @@ -57,6 +57,7 @@ auto ProbeDevice(const std::string& device) { namespace onnxruntime { namespace test { +// this test requiresOV 2025.4+ to run, currently CI uses OV 2025.2, so the test will be disabled until OV is updated TEST_P(OVEP_ExtInit_Tests, DISABLED_ModelFromExtInit) { const auto& device = GetParam(); if (!ProbeDevice(device))