-
Notifications
You must be signed in to change notification settings - Fork 53
Don't embed external initializers into the proto to avoid 2GB limit #817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: ovep-develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -21,6 +21,7 @@ | |||||
#include "core/providers/openvino/ov_versions/capability.h" | ||||||
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h" | ||||||
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" | ||||||
#include "../../framework/tensorprotoutils.h" | ||||||
|
||||||
namespace onnxruntime { | ||||||
namespace openvino_ep { | ||||||
|
@@ -453,6 +454,78 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on | |||||
#endif | ||||||
} | ||||||
|
||||||
static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { | ||||||
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; | ||||||
auto* external_data = proto_init->mutable_external_data(); | ||||||
bool found_location = false, found_offset = false, found_length = false; | ||||||
const int ext_data_size = external_data->size(); | ||||||
proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); | ||||||
|
||||||
for (int j = 0; j < ext_data_size; ++j) { | ||||||
auto& ext_entry = external_data->at(j); | ||||||
auto& key = *ext_entry.mutable_key(); | ||||||
if (key == "location") { | ||||||
*ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
found_location = true; | ||||||
} else if (key == "offset") { | ||||||
*ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
found_offset = true; | ||||||
} else if (key == "length") { | ||||||
*ext_entry.mutable_value() = std::to_string(data_size); | ||||||
found_length = true; | ||||||
} | ||||||
} | ||||||
|
||||||
if (!found_location) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "location"; | ||||||
*new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
} | ||||||
if (!found_offset) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "offset"; | ||||||
*new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
} | ||||||
if (!found_length) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "length"; | ||||||
*new_entry->mutable_value() = std::to_string(data_size); | ||||||
} | ||||||
} | ||||||
|
||||||
static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { | ||||||
// Remove constness as we need to use mutable_external_data() to get the entries to read. | ||||||
// The entries themselves are not modified... | ||||||
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init); | ||||||
auto* entry_protos = mutable_proto.mutable_external_data(); | ||||||
for (int i = 0; i < entry_protos->size(); i++) { | ||||||
auto& string_entry_proto{entry_protos->at(i)}; | ||||||
const auto& pb_key{*(string_entry_proto.mutable_key())}; | ||||||
const auto& pb_value{*(string_entry_proto.mutable_value())}; | ||||||
if (pb_key == "location") { | ||||||
location = pb_value; | ||||||
} else if (pb_key == "offset") { | ||||||
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); | ||||||
if (res.ec != std::errc()) { | ||||||
std::ostringstream err_msg; | ||||||
err_msg << "External data in memory has invalid offset field: " | ||||||
<< src_init->name() << "], location: " << location | ||||||
<< ", offset: " << pb_value; | ||||||
ORT_THROW(err_msg.str()); | ||||||
} | ||||||
} else if (pb_key == "length") { | ||||||
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); | ||||||
if (res.ec != std::errc()) { | ||||||
std::ostringstream err_msg; | ||||||
err_msg << "External data in memory has invalid length field: " | ||||||
<< src_init->name() << "], location: " << location | ||||||
<< ", length: " << pb_value; | ||||||
ORT_THROW(err_msg.str()); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
std::unique_ptr<ONNX_NAMESPACE::ModelProto> | ||||||
BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | ||||||
const onnxruntime::GraphViewer& subgraph, | ||||||
|
@@ -529,12 +602,94 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | |||||
return model_proto; | ||||||
} else { | ||||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; | ||||||
|
||||||
const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable { | ||||||
auto allInitializers = subgraph.GetAllInitializedTensors(); | ||||||
for (auto& [name, tp] : allInitializers) { | ||||||
if (utils::HasExternalDataInMemory(*tp)) { | ||||||
++cnt; | ||||||
} | ||||||
} | ||||||
return cnt; | ||||||
}(); | ||||||
|
||||||
// when we have external weights in memory, the model proto will actually embed those | ||||||
// and bloat the serialized string. We can avoid that by not including the data in the proto | ||||||
// but then we have to update those initializers and set the external_data fields to mem_addr tag... | ||||||
// 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort | ||||||
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is some basic test, to limit the use case for those ext initializers, but I'm open to suggestions. Maybe we even need a flag that is passed down from |
||||||
|
||||||
auto model = subgraph.CreateModel(logger); | ||||||
auto model_proto = model->ToProto(); | ||||||
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); | ||||||
subgraph.ToProto(*model_proto->mutable_graph(), true, true); | ||||||
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, | ||||||
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); | ||||||
|
||||||
print_model_proto_duration(); | ||||||
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); | ||||||
|
||||||
// new code: | ||||||
if (!include_initializer_data_in_proto) | ||||||
{ | ||||||
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..."; | ||||||
const auto& allInitializers = subgraph.GetAllInitializedTensors(); | ||||||
auto* graph_proto = model_proto->mutable_graph(); | ||||||
auto* proto_initializers = graph_proto->mutable_initializer(); | ||||||
|
||||||
// Build a map for quick lookup by name | ||||||
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map; | ||||||
for (int i = 0, n = proto_initializers->size(); i < n; ++i) { | ||||||
auto& proto_init = proto_initializers->at(i); | ||||||
proto_initializer_map[proto_init.name()] = &proto_init; | ||||||
} | ||||||
|
||||||
for (const auto& init_entry : allInitializers) { | ||||||
const std::string& name = init_entry.first; | ||||||
const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second; | ||||||
|
||||||
auto it = proto_initializer_map.find(name); | ||||||
if (it == proto_initializer_map.end()) | ||||||
continue; | ||||||
|
||||||
auto* proto_init = it->second; | ||||||
|
||||||
// If the proto initializer is missing data, fill it in | ||||||
if (!proto_init->has_raw_data() && src_init->has_raw_data()) { | ||||||
*proto_init->mutable_raw_data() = src_init->raw_data(); | ||||||
} | ||||||
|
||||||
// Only set in-memory external_data fields if the data is in memory | ||||||
if (src_init->has_raw_data()) { | ||||||
LOGS(logger, VERBOSE) << "In-memory initializer RAW: " | ||||||
<< src_init->name() | ||||||
<< ", data_type: " << src_init->data_type() | ||||||
<< ", raw_data size: " << src_init->raw_data().size(); | ||||||
|
||||||
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); | ||||||
} | ||||||
else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { | ||||||
std::string location; | ||||||
size_t offset = 0; | ||||||
size_t length = 0; | ||||||
ReadExternalDataFields(src_init, location, offset, length); | ||||||
|
||||||
LOGS(logger, VERBOSE) << "In-memory initializer EXT: " | ||||||
<< src_init->name() | ||||||
<< ", size: " << length; | ||||||
|
||||||
SetExternalDataFields(proto_init, (const void*)offset, length); | ||||||
} | ||||||
else { | ||||||
// Debug info for file-based initializers | ||||||
LOGS(logger, VERBOSE)<< "File-based initializer: " | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing space between
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
<< src_init->name() | ||||||
<< ", data_type: " << src_init->data_type(); | ||||||
} | ||||||
|
||||||
} | ||||||
|
||||||
} | ||||||
|
||||||
return model_proto; | ||||||
} | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a trailing space after the period in the comment on line 497.
Copilot uses AI. Check for mistakes.