Skip to content

Commit e653477

Browse files
razvanapetroaieDanLiu2Intel
authored andcommitted
Starting the one shot implementation & adjusting the code a bit
1 parent d3aea11 commit e653477

File tree

3 files changed

+66
-19
lines changed

3 files changed

+66
-19
lines changed

src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ class VCLCompilerImpl final : public intel_npu::ICompiler {
9393

9494
NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
9595

96-
// std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
97-
// const Config& config) const override;
96+
std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
97+
const Config& config) const override;
9898

9999
NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
100100
const Config& config,

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
165165
OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
166166
OPENVINO_ASSERT(_zeGraphExt);
167167

168-
std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
169-
std::shared_ptr<NetworkDescription> mainNetworkDescription;
170-
171168
_logger.debug("compile start");
172169

173170
FilteredConfig localConfig = config;
@@ -186,29 +183,23 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
186183
std::vector<GraphDescriptor> initGraphDescriptors;
187184
std::vector<ov::Tensor> tensorsInits;
188185
std::vector<NetworkMetadata> initNetworkMetadata;
186+
std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
189187

190188
ov::Tensor tensorMain;
191189
GraphDescriptor mainGraphDesc;
192190
NetworkMetadata mainNetworkMetadata;
191+
std::shared_ptr<NetworkDescription> mainNetworkDescription;
193192

194193
switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
195194
case ov::intel_npu::WSVersion::ONE_SHOT: {
196195
std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
197196
_compiler->compileWsOneShot(model, localConfig);
198197

199-
#if 0 // TODO: it is not clear whether we should change the name
200-
OPENVINO_ASSERT(isMain(initMainNetworkDescriptions.back()->metadata.name),
201-
"Unexpected network name for main:",
202-
initMainNetworkDescriptions.back()->metadata.name);
203-
#endif
204-
205198
mainNetworkDescription = initMainNetworkDescriptions.back();
206199
initMainNetworkDescriptions.pop_back();
207200
initNetworkDescriptions = std::move(initMainNetworkDescriptions);
208201

209-
ov::Tensor tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
210-
GraphDescriptor mainGraphDesc;
211-
NetworkMetadata mainNetworkMetadata;
202+
tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
212203
if (_zeGraphExt) {
213204
// Depending on the config, we may get an error when trying to
214205
// get the graph handle from the compiled network
@@ -221,9 +212,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
221212
}
222213
}
223214

224-
std::vector<GraphDescriptor> initGraphDescriptors;
225-
std::vector<ov::Tensor> tensorsInits;
226-
std::vector<NetworkMetadata> initNetworkMetadata;
227215
initGraphDescriptors.reserve(initNetworkDescriptions.size());
228216
tensorsInits.reserve(initNetworkDescriptions.size());
229217
initNetworkMetadata.reserve(initNetworkDescriptions.size());
@@ -245,6 +233,11 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
245233
}
246234
} break;
247235
case ov::intel_npu::WSVersion::ITERATIVE: {
236+
OPENVINO_ASSERT(_zeGraphExt,
237+
"The \"iterative\" implementation of the weights separation feature requires a Level Zero "
238+
"graph handle to compile a model.");
239+
240+
// The state of the model needs to be reset every iteration
248241
const std::shared_ptr<ov::Model> originalModel = model->clone();
249242
std::shared_ptr<ov::Model> targetModel = model;
250243
size_t i = 0;
@@ -256,18 +249,18 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
256249
NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);
257250

258251
if (isInitMetadata(networkDescription->metadata)) {
259-
initNetworkDescriptions.push_back(networkDescription);
260252
targetModel = originalModel->clone();
261253
initGraphDescriptors.push_back(graphDesc);
262254
tensorsInits.push_back(std::move(tensor));
263255
initNetworkMetadata.push_back(std::move(networkMetadata));
256+
initNetworkDescriptions.push_back(networkDescription);
264257
continue;
265258
}
266259

267-
mainNetworkDescription = std::move(networkDescription);
268260
tensorMain = std::move(tensor);
269261
mainGraphDesc = graphDesc;
270262
mainNetworkMetadata = std::move(networkMetadata);
263+
mainNetworkDescription = std::move(networkDescription);
271264
break;
272265
}
273266
} break;

src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,60 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
359359
}
360360
}
361361

362+
std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneShot(
363+
const std::shared_ptr<ov::Model>& model,
364+
const Config& config) const {
365+
_logger.debug("compileWsOneShot start");
366+
367+
const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
368+
_logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
369+
370+
_logger.debug("serialize IR");
371+
ze_graph_compiler_version_info_t compilerVersion;
372+
compilerVersion.major = _compilerProperties.version.major;
373+
compilerVersion.minor = _compilerProperties.version.minor;
374+
375+
const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
376+
if (filteredConfig == nullptr) {
377+
OPENVINO_THROW("config is not FilteredConfig");
378+
}
379+
FilteredConfig updatedConfig = *filteredConfig;
380+
auto serializedIR =
381+
driver_compiler_utils::serializeIR(model,
382+
compilerVersion,
383+
maxOpsetVersion,
384+
updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
385+
? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
386+
: true,
387+
updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
388+
389+
std::string buildFlags;
390+
const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
391+
392+
_logger.debug("create build flags");
393+
buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices);
394+
buildFlags += " ";
395+
buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
396+
_logger.debug("final build flags to compiler: %s", buildFlags.c_str());
397+
398+
vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
399+
serializedIR.first,
400+
buildFlags.c_str(),
401+
buildFlags.size()};
402+
_logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
403+
404+
_logger.debug("Using vclAllocatedExecutableCreateWS");
405+
vcl_allocator_vector allocator;
406+
uint8_t* blob = nullptr;
407+
size_t size = 0;
408+
409+
// TODO fill the rest. Call "vclAllocatedExecutableCreateWS" and any other remote function required to retrieve the
410+
// vector of blobs and use them to construct the vector of "NetworkDescription". The metadata objects can be empty.
411+
412+
_logger.debug("compile end, blob size:%d", allocator.m_vec.size());
413+
return {};
414+
}
415+
362416
NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
363417
const Config& config,
364418
size_t callNumber) const {

0 commit comments

Comments
 (0)