Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 156 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
#include "../../framework/tensorprotoutils.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -453,6 +454,78 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
#endif
}

static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) {
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
auto* external_data = proto_init->mutable_external_data();
bool found_location = false, found_offset = false, found_length = false;
const int ext_data_size = external_data->size();
proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);

for (int j = 0; j < ext_data_size; ++j) {
auto& ext_entry = external_data->at(j);
auto& key = *ext_entry.mutable_key();
if (key == "location") {
*ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
found_location = true;
} else if (key == "offset") {
*ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
found_offset = true;
} else if (key == "length") {
*ext_entry.mutable_value() = std::to_string(data_size);
found_length = true;
}
}

if (!found_location) {
auto* new_entry = external_data->Add();
*new_entry->mutable_key() = "location";
*new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
}
if (!found_offset) {
auto* new_entry = external_data->Add();
*new_entry->mutable_key() = "offset";
*new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
}
if (!found_length) {
auto* new_entry = external_data->Add();
*new_entry->mutable_key() = "length";
*new_entry->mutable_value() = std::to_string(data_size);
}
}

static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) {
// Remove constness as we need to use mutable_external_data() to get the entries to read.
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a trailing space after the period in the comment on line 497.

Suggested change
// Remove constness as we need to use mutable_external_data() to get the entries to read.
// Remove constness as we need to use mutable_external_data() to get the entries to read.

Copilot uses AI. Check for mistakes.

// The entries themselves are not modified...
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init);
auto* entry_protos = mutable_proto.mutable_external_data();
for (int i = 0; i < entry_protos->size(); i++) {
auto& string_entry_proto{entry_protos->at(i)};
const auto& pb_key{*(string_entry_proto.mutable_key())};
const auto& pb_value{*(string_entry_proto.mutable_value())};
if (pb_key == "location") {
location = pb_value;
} else if (pb_key == "offset") {
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset);
if (res.ec != std::errc()) {
std::ostringstream err_msg;
err_msg << "External data in memory has invalid offset field: "
<< src_init->name() << "], location: " << location
<< ", offset: " << pb_value;
ORT_THROW(err_msg.str());
}
} else if (pb_key == "length") {
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length);
if (res.ec != std::errc()) {
std::ostringstream err_msg;
err_msg << "External data in memory has invalid length field: "
<< src_init->name() << "], location: " << location
<< ", length: " << pb_value;
ORT_THROW(err_msg.str());
}
}
}
}

std::unique_ptr<ONNX_NAMESPACE::ModelProto>
BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
Expand Down Expand Up @@ -529,12 +602,94 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
return model_proto;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";

const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable {
auto allInitializers = subgraph.GetAllInitializedTensors();
for (auto& [name, tp] : allInitializers) {
if (utils::HasExternalDataInMemory(*tp)) {
++cnt;
}
}
return cnt;
}();

// when we have external weights in memory, the model proto will actually embed those
// and bloat the serialized string. We can avoid that by not including the data in the proto
// but then we have to update those initializers and set the external_data fields to mem_addr tag...
// 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is some basic test, to limit the use case for those ext initializers, but I'm open to suggestions. Maybe we even need a flag that is passed down from SessionOptions. If we want to enable this logic for all cases, then we might need some more testing.


auto model = subgraph.CreateModel(logger);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
subgraph.ToProto(*model_proto->mutable_graph(), true, true);
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);

print_model_proto_duration();
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);

// new code:
if (!include_initializer_data_in_proto)
{
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata...";
const auto& allInitializers = subgraph.GetAllInitializedTensors();
auto* graph_proto = model_proto->mutable_graph();
auto* proto_initializers = graph_proto->mutable_initializer();

// Build a map for quick lookup by name
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
auto& proto_init = proto_initializers->at(i);
proto_initializer_map[proto_init.name()] = &proto_init;
}

for (const auto& init_entry : allInitializers) {
const std::string& name = init_entry.first;
const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second;

auto it = proto_initializer_map.find(name);
if (it == proto_initializer_map.end())
continue;

auto* proto_init = it->second;

// If the proto initializer is missing data, fill it in
if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
*proto_init->mutable_raw_data() = src_init->raw_data();
}

// Only set in-memory external_data fields if the data is in memory
if (src_init->has_raw_data()) {
LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
<< src_init->name()
<< ", data_type: " << src_init->data_type()
<< ", raw_data size: " << src_init->raw_data().size();

SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
}
else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
std::string location;
size_t offset = 0;
size_t length = 0;
ReadExternalDataFields(src_init, location, offset, length);

LOGS(logger, VERBOSE) << "In-memory initializer EXT: "
<< src_init->name()
<< ", size: " << length;

SetExternalDataFields(proto_init, (const void*)offset, length);
}
else {
// Debug info for file-based initializers
LOGS(logger, VERBOSE)<< "File-based initializer: "
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space between LOGS(logger, VERBOSE) and << operator.

Suggested change
LOGS(logger, VERBOSE)<< "File-based initializer: "
LOGS(logger, VERBOSE) << "File-based initializer: "

Copilot uses AI. Check for mistakes.

<< src_init->name()
<< ", data_type: " << src_init->data_type();
}

}

}

return model_proto;
}
}
Expand Down