diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 99f28439db53a..91ed230ea528f 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -21,6 +21,7 @@ #include "core/providers/openvino/ov_versions/capability.h" #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" +#include "../../framework/tensorprotoutils.h" namespace onnxruntime { namespace openvino_ep { @@ -453,6 +454,78 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on #endif } +static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; + auto* external_data = proto_init->mutable_external_data(); + bool found_location = false, found_offset = false, found_length = false; + const int ext_data_size = external_data->size(); + proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); + + for (int j = 0; j < ext_data_size; ++j) { + auto& ext_entry = external_data->at(j); + auto& key = *ext_entry.mutable_key(); + if (key == "location") { + *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + found_location = true; + } else if (key == "offset") { + *ext_entry.mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + found_offset = true; + } else if (key == "length") { + *ext_entry.mutable_value() = std::to_string(data_size); + found_length = true; + } + } + + if (!found_location) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "location"; + *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; + } + if (!found_offset) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "offset"; + *new_entry->mutable_value() = std::to_string(reinterpret_cast(data_ptr)); + } + if (!found_length) { + auto* new_entry = external_data->Add(); + *new_entry->mutable_key() = "length"; + *new_entry->mutable_value() = std::to_string(data_size); + } +} + +static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { + // Remove constness as we need to use mutable_external_data() to get the entries to read. + // The entries themselves are not modified... + auto& mutable_proto = *const_cast(src_init); + auto* entry_protos = mutable_proto.mutable_external_data(); + for (int i = 0; i < entry_protos->size(); i++) { + auto& string_entry_proto{entry_protos->at(i)}; + const auto& pb_key{*(string_entry_proto.mutable_key())}; + const auto& pb_value{*(string_entry_proto.mutable_value())}; + if (pb_key == "location") { + location = pb_value; + } else if (pb_key == "offset") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); + if (res.ec != std::errc()) { + std::ostringstream err_msg; + err_msg << "External data in memory has invalid offset field: " + << src_init->name() << "], location: " << location + << ", offset: " << pb_value; + ORT_THROW(err_msg.str()); + } + } else if (pb_key == "length") { + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); + if (res.ec != std::errc()) { + std::ostringstream err_msg; + err_msg << "External data in memory has invalid length field: " + << src_init->name() << "], location: " << location + << ", length: " << pb_value; + ORT_THROW(err_msg.str()); + } + } + } +} + std::unique_ptr BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, @@ -529,12 +602,94 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, return model_proto; } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; + + const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable { + auto allInitializers = subgraph.GetAllInitializedTensors(); + for (auto& [name, tp] : allInitializers) { + if (utils::HasExternalDataInMemory(*tp)) { + ++cnt; + } + } + return cnt; + }(); + + // when we have external weights in memory, the model proto will actually embed those + // and bloat the serialized string. We can avoid that by not including the data in the proto + // but then we have to update those initializers and set the external_data fields to mem_addr tag... + // 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1); + auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); - subgraph.ToProto(*model_proto->mutable_graph(), true, true); + subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, + /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); + print_model_proto_duration(); DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); + + // new code: + if (!include_initializer_data_in_proto) + { + LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..."; + const auto& allInitializers = subgraph.GetAllInitializedTensors(); + auto* graph_proto = model_proto->mutable_graph(); + auto* proto_initializers = graph_proto->mutable_initializer(); + + // Build a map for quick lookup by name + std::unordered_map proto_initializer_map; + for (int i = 0, n = proto_initializers->size(); i < n; ++i) { + auto& proto_init = proto_initializers->at(i); + proto_initializer_map[proto_init.name()] = &proto_init; + } + + for (const auto& init_entry : allInitializers) { + const std::string& name = init_entry.first; + const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second; + + auto it = proto_initializer_map.find(name); + if (it == proto_initializer_map.end()) + continue; + + auto* proto_init = it->second; + + // If the proto initializer is missing data, fill it in + if (!proto_init->has_raw_data() && src_init->has_raw_data()) { + *proto_init->mutable_raw_data() = src_init->raw_data(); + } + + // Only set in-memory external_data fields if the data is in memory + if (src_init->has_raw_data()) { + LOGS(logger, VERBOSE) << "In-memory initializer RAW: " + << src_init->name() + << ", data_type: " << src_init->data_type() + << ", raw_data size: " << src_init->raw_data().size(); + + SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); + } + else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { + std::string location; + size_t offset = 0; + size_t length = 0; + ReadExternalDataFields(src_init, location, offset, length); + + LOGS(logger, VERBOSE) << "In-memory initializer EXT: " + << src_init->name() + << ", size: " << length; + + SetExternalDataFields(proto_init, (const void*)offset, length); + } + else { + // Debug info for file-based initializers + LOGS(logger, VERBOSE)<< "File-based initializer: " + << src_init->name() + << ", data_type: " << src_init->data_type(); + } + + } + + } + return model_proto; } }