@@ -61,6 +61,13 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
6161 return ov::make_tensor (impl);
6262}
6363
64+ bool isInitMetadata (const intel_npu::NetworkMetadata& networkMetadata) {
65+ if (networkMetadata.inputs .size () == 0 ) {
66+ return false ;
67+ }
68+ return networkMetadata.inputs .at (0 ).isInitInputWeights ;
69+ }
70+
6471} // namespace
6572
6673namespace intel_npu {
@@ -156,24 +163,14 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
156163std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS (const std::shared_ptr<ov::Model>& model,
157164 const FilteredConfig& config) const {
158165 OV_ITT_TASK_CHAIN (COMPILE_BLOB, itt::domains::NPUPlugin, " PluginCompilerAdapter" , " compileWS" );
166+ OPENVINO_ASSERT (_zeGraphExt);
159167
160168 std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
161169 std::shared_ptr<NetworkDescription> mainNetworkDescription;
162170
163171 _logger.debug (" compile start" );
164172
165- const auto starts_with = [](const std::string& str, const std::string& prefix) {
166- return str.substr (0 , prefix.size ()) == prefix;
167- };
168- const auto isInit = [&](std::string name) {
169- return starts_with (name, " init" );
170- };
171-
172- const auto isMain = [&](std::string name) {
173- return starts_with (name, " main" );
174- };
175-
176- Config localConfig = config;
173+ FilteredConfig localConfig = config;
177174 if (!localConfig.has <SEPARATE_WEIGHTS_VERSION>()) {
178175 localConfig.update ({{ov::intel_npu::separate_weights_version.name (), " ONE_SHOT" }});
179176 }
@@ -185,6 +182,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
185182 if (_logger.level () >= ov::log::Level::INFO) {
186183 compile_model_mem_start = get_peak_memory_usage ();
187184 }
185+
186+ std::vector<GraphDescriptor> initGraphDescriptors;
187+ std::vector<ov::Tensor> tensorsInits;
188+ std::vector<NetworkMetadata> initNetworkMetadata;
189+
190+ ov::Tensor tensorMain;
191+ GraphDescriptor mainGraphDesc;
192+ NetworkMetadata mainNetworkMetadata;
193+
188194 switch (localConfig.get <SEPARATE_WEIGHTS_VERSION>()) {
189195 case ov::intel_npu::WSVersion::ONE_SHOT: {
190196 std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
@@ -199,6 +205,44 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
199205 mainNetworkDescription = initMainNetworkDescriptions.back ();
200206 initMainNetworkDescriptions.pop_back ();
201207 initNetworkDescriptions = std::move (initMainNetworkDescriptions);
208+
209+ ov::Tensor tensorMain = make_tensor_from_vector (mainNetworkDescription->compiledNetwork );
210+ GraphDescriptor mainGraphDesc;
211+ NetworkMetadata mainNetworkMetadata;
212+ if (_zeGraphExt) {
213+ // Depending on the config, we may get an error when trying to
214+ // get the graph handle from the compiled network
215+ try {
216+ mainGraphDesc = _zeGraphExt->getGraphDescriptor (tensorMain.data (), tensorMain.get_byte_size ());
217+ mainNetworkMetadata = _zeGraphExt->getNetworkMeta (mainGraphDesc);
218+ } catch (...) {
219+ _logger.info (" Failed to obtain the level zero graph handle. Inference requests for this model are not "
220+ " allowed. Only exports are available" );
221+ }
222+ }
223+
224+ std::vector<GraphDescriptor> initGraphDescriptors;
225+ std::vector<ov::Tensor> tensorsInits;
226+ std::vector<NetworkMetadata> initNetworkMetadata;
227+ initGraphDescriptors.reserve (initNetworkDescriptions.size ());
228+ tensorsInits.reserve (initNetworkDescriptions.size ());
229+ initNetworkMetadata.reserve (initNetworkDescriptions.size ());
230+ for (auto & networkDesc : initNetworkDescriptions) {
231+ ov::Tensor tensor = make_tensor_from_vector (networkDesc->compiledNetwork );
232+ GraphDescriptor initGraphDesc;
233+ NetworkMetadata initNetworkMeta;
234+ if (_zeGraphExt) {
235+ try {
236+ initGraphDesc = _zeGraphExt->getGraphDescriptor (tensor.data (), tensor.get_byte_size ());
237+ initNetworkMeta = _zeGraphExt->getNetworkMeta (initGraphDesc);
238+ } catch (...) {
239+ }
240+ }
241+
242+ initGraphDescriptors.push_back (initGraphDesc);
243+ tensorsInits.push_back (std::move (tensor));
244+ initNetworkMetadata.push_back (std::move (initNetworkMeta));
245+ }
202246 } break ;
203247 case ov::intel_npu::WSVersion::ITERATIVE: {
204248 const std::shared_ptr<ov::Model> originalModel = model->clone ();
@@ -207,16 +251,23 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
207251
208252 while (auto networkDescription =
209253 std::make_shared<NetworkDescription>(_compiler->compileWsIterative (targetModel, localConfig, i++))) {
210- if (isInit (networkDescription->metadata .name )) {
254+ ov::Tensor tensor = make_tensor_from_vector (networkDescription->compiledNetwork );
255+ GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor (tensor.data (), tensor.get_byte_size ());
256+ NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta (graphDesc);
257+
258+ if (isInitMetadata (networkDescription->metadata )) {
211259 initNetworkDescriptions.push_back (networkDescription);
212260 targetModel = originalModel->clone ();
261+ initGraphDescriptors.push_back (graphDesc);
262+ tensorsInits.push_back (std::move (tensor));
263+ initNetworkMetadata.push_back (std::move (networkMetadata));
213264 continue ;
214265 }
215- OPENVINO_ASSERT (isMain (networkDescription->metadata .name ),
216- " Unexpected network name: " ,
217- networkDescription->metadata .name );
218266
219267 mainNetworkDescription = std::move (networkDescription);
268+ tensorMain = std::move (tensor);
269+ mainGraphDesc = graphDesc;
270+ mainNetworkMetadata = std::move (networkMetadata);
220271 break ;
221272 }
222273 } break ;
@@ -236,44 +287,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
236287
237288 _logger.debug (" compile end" );
238289
239- ov::Tensor tensorMain = make_tensor_from_vector (mainNetworkDescription->compiledNetwork );
240- GraphDescriptor mainGraphDesc;
241- NetworkMetadata mainNetworkMetadata;
242- if (_zeGraphExt) {
243- // Depending on the config, we may get an error when trying to
244- // get the graph handle from the compiled network
245- try {
246- mainGraphDesc = _zeGraphExt->getGraphDescriptor (tensorMain.data (), tensorMain.get_byte_size ());
247- mainNetworkMetadata = _zeGraphExt->getNetworkMeta (mainGraphDesc);
248- } catch (...) {
249- _logger.info (" Failed to obtain the level zero graph handle. Inference requests for this model are not "
250- " allowed. Only exports are available" );
251- }
252- }
253-
254- std::vector<GraphDescriptor> initGraphDescriptors;
255- std::vector<ov::Tensor> tensorsInits;
256- std::vector<NetworkMetadata> initNetworkMetadata;
257- initGraphDescriptors.reserve (initNetworkDescriptions.size ());
258- tensorsInits.reserve (initNetworkDescriptions.size ());
259- initNetworkMetadata.reserve (initNetworkDescriptions.size ());
260- for (auto & networkDesc : initNetworkDescriptions) {
261- ov::Tensor tensor = make_tensor_from_vector (networkDesc->compiledNetwork );
262- GraphDescriptor initGraphDesc;
263- NetworkMetadata initNetworkMeta;
264- if (_zeGraphExt) {
265- try {
266- initGraphDesc = _zeGraphExt->getGraphDescriptor (tensor.data (), tensor.get_byte_size ());
267- initNetworkMeta = _zeGraphExt->getNetworkMeta (initGraphDesc);
268- } catch (...) {
269- }
270- }
271-
272- initGraphDescriptors.push_back (initGraphDesc);
273- tensorsInits.push_back (std::move (tensor));
274- initNetworkMetadata.push_back (std::move (initNetworkMeta));
275- }
276-
277290 return std::make_shared<WeightlessGraph>(
278291 _zeGraphExt,
279292 _zeroInitStruct,
0 commit comments