Skip to content

Commit d3aea11

Browse files
razvanapetroaieDanLiu2Intel
authored andcommitted
WS iterative working
1 parent d62b895 commit d3aea11

File tree

3 files changed

+87
-54
lines changed

3 files changed

+87
-54
lines changed

src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,13 @@ class VCLCompilerImpl final : public intel_npu::ICompiler {
9393

9494
NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
9595

96+
// std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
97+
// const Config& config) const override;
98+
99+
NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
100+
const Config& config,
101+
size_t callNumber) const override;
102+
96103
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
97104

98105
NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

Lines changed: 67 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
6161
return ov::make_tensor(impl);
6262
}
6363

64+
bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
65+
if (networkMetadata.inputs.size() == 0) {
66+
return false;
67+
}
68+
return networkMetadata.inputs.at(0).isInitInputWeights;
69+
}
70+
6471
} // namespace
6572

6673
namespace intel_npu {
@@ -156,24 +163,14 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
156163
std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<ov::Model>& model,
157164
const FilteredConfig& config) const {
158165
OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
166+
OPENVINO_ASSERT(_zeGraphExt);
159167

160168
std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
161169
std::shared_ptr<NetworkDescription> mainNetworkDescription;
162170

163171
_logger.debug("compile start");
164172

165-
const auto starts_with = [](const std::string& str, const std::string& prefix) {
166-
return str.substr(0, prefix.size()) == prefix;
167-
};
168-
const auto isInit = [&](std::string name) {
169-
return starts_with(name, "init");
170-
};
171-
172-
const auto isMain = [&](std::string name) {
173-
return starts_with(name, "main");
174-
};
175-
176-
Config localConfig = config;
173+
FilteredConfig localConfig = config;
177174
if (!localConfig.has<SEPARATE_WEIGHTS_VERSION>()) {
178175
localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}});
179176
}
@@ -185,6 +182,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
185182
if (_logger.level() >= ov::log::Level::INFO) {
186183
compile_model_mem_start = get_peak_memory_usage();
187184
}
185+
186+
std::vector<GraphDescriptor> initGraphDescriptors;
187+
std::vector<ov::Tensor> tensorsInits;
188+
std::vector<NetworkMetadata> initNetworkMetadata;
189+
190+
ov::Tensor tensorMain;
191+
GraphDescriptor mainGraphDesc;
192+
NetworkMetadata mainNetworkMetadata;
193+
188194
switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
189195
case ov::intel_npu::WSVersion::ONE_SHOT: {
190196
std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
@@ -199,6 +205,44 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
199205
mainNetworkDescription = initMainNetworkDescriptions.back();
200206
initMainNetworkDescriptions.pop_back();
201207
initNetworkDescriptions = std::move(initMainNetworkDescriptions);
208+
209+
ov::Tensor tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
210+
GraphDescriptor mainGraphDesc;
211+
NetworkMetadata mainNetworkMetadata;
212+
if (_zeGraphExt) {
213+
// Depending on the config, we may get an error when trying to
214+
// get the graph handle from the compiled network
215+
try {
216+
mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
217+
mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
218+
} catch (...) {
219+
_logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
220+
"allowed. Only exports are available");
221+
}
222+
}
223+
224+
std::vector<GraphDescriptor> initGraphDescriptors;
225+
std::vector<ov::Tensor> tensorsInits;
226+
std::vector<NetworkMetadata> initNetworkMetadata;
227+
initGraphDescriptors.reserve(initNetworkDescriptions.size());
228+
tensorsInits.reserve(initNetworkDescriptions.size());
229+
initNetworkMetadata.reserve(initNetworkDescriptions.size());
230+
for (auto& networkDesc : initNetworkDescriptions) {
231+
ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
232+
GraphDescriptor initGraphDesc;
233+
NetworkMetadata initNetworkMeta;
234+
if (_zeGraphExt) {
235+
try {
236+
initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
237+
initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
238+
} catch (...) {
239+
}
240+
}
241+
242+
initGraphDescriptors.push_back(initGraphDesc);
243+
tensorsInits.push_back(std::move(tensor));
244+
initNetworkMetadata.push_back(std::move(initNetworkMeta));
245+
}
202246
} break;
203247
case ov::intel_npu::WSVersion::ITERATIVE: {
204248
const std::shared_ptr<ov::Model> originalModel = model->clone();
@@ -207,16 +251,23 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
207251

208252
while (auto networkDescription =
209253
std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
210-
if (isInit(networkDescription->metadata.name)) {
254+
ov::Tensor tensor = make_tensor_from_vector(networkDescription->compiledNetwork);
255+
GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
256+
NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);
257+
258+
if (isInitMetadata(networkDescription->metadata)) {
211259
initNetworkDescriptions.push_back(networkDescription);
212260
targetModel = originalModel->clone();
261+
initGraphDescriptors.push_back(graphDesc);
262+
tensorsInits.push_back(std::move(tensor));
263+
initNetworkMetadata.push_back(std::move(networkMetadata));
213264
continue;
214265
}
215-
OPENVINO_ASSERT(isMain(networkDescription->metadata.name),
216-
"Unexpected network name: ",
217-
networkDescription->metadata.name);
218266

219267
mainNetworkDescription = std::move(networkDescription);
268+
tensorMain = std::move(tensor);
269+
mainGraphDesc = graphDesc;
270+
mainNetworkMetadata = std::move(networkMetadata);
220271
break;
221272
}
222273
} break;
@@ -236,44 +287,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
236287

237288
_logger.debug("compile end");
238289

239-
ov::Tensor tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
240-
GraphDescriptor mainGraphDesc;
241-
NetworkMetadata mainNetworkMetadata;
242-
if (_zeGraphExt) {
243-
// Depending on the config, we may get an error when trying to
244-
// get the graph handle from the compiled network
245-
try {
246-
mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
247-
mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
248-
} catch (...) {
249-
_logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
250-
"allowed. Only exports are available");
251-
}
252-
}
253-
254-
std::vector<GraphDescriptor> initGraphDescriptors;
255-
std::vector<ov::Tensor> tensorsInits;
256-
std::vector<NetworkMetadata> initNetworkMetadata;
257-
initGraphDescriptors.reserve(initNetworkDescriptions.size());
258-
tensorsInits.reserve(initNetworkDescriptions.size());
259-
initNetworkMetadata.reserve(initNetworkDescriptions.size());
260-
for (auto& networkDesc : initNetworkDescriptions) {
261-
ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
262-
GraphDescriptor initGraphDesc;
263-
NetworkMetadata initNetworkMeta;
264-
if (_zeGraphExt) {
265-
try {
266-
initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
267-
initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
268-
} catch (...) {
269-
}
270-
}
271-
272-
initGraphDescriptors.push_back(initGraphDesc);
273-
tensorsInits.push_back(std::move(tensor));
274-
initNetworkMetadata.push_back(std::move(initNetworkMeta));
275-
}
276-
277290
return std::make_shared<WeightlessGraph>(
278291
_zeGraphExt,
279292
_zeroInitStruct,

src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,19 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
359359
}
360360
}
361361

362+
NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
363+
const Config& config,
364+
size_t callNumber) const {
365+
_logger.debug("compileWsIterative start");
366+
const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
367+
if (filteredConfig == nullptr) {
368+
OPENVINO_THROW("config is not FilteredConfig");
369+
}
370+
FilteredConfig updatedConfig = *filteredConfig;
371+
updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber++)}});
372+
return compile(model, config);
373+
}
374+
362375
intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
363376
_logger.debug("parse start");
364377
// VCL does not support parse, return empty metadata

0 commit comments

Comments
 (0)