Skip to content

Commit 83b1772

Browse files
Adding OV common passes on model importation
1 parent 579354f commit 83b1772

File tree

2 files changed

+162
-0
lines changed

2 files changed

+162
-0
lines changed

src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,13 @@ std::vector<std::shared_ptr<IGraph>> DriverCompilerAdapter::compileWS(const std:
412412

413413
const ze_graph_compiler_version_info_t& compilerVersion = _compilerProperties.compilerVersion;
414414

415+
if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 3)) {
416+
OPENVINO_THROW("Minimum compiler version required for weights separation: 6.3. Found: ",
417+
compilerVersion.major,
418+
".",
419+
compilerVersion.minor);
420+
}
421+
415422
const auto maxOpsetVersion = _compilerProperties.maxOVOpsetVersionSupported;
416423
_logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
417424

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+155
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,64 @@
55
#include "plugin.hpp"
66

77
#include <fstream>
8+
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
9+
#include <transformations/common_optimizations/batch_to_space_fusion.hpp>
10+
#include <transformations/common_optimizations/conv_mul_fusion.hpp>
11+
#include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
12+
#include <transformations/common_optimizations/depth_to_space_fusion.hpp>
13+
#include <transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp>
14+
#include <transformations/common_optimizations/fq_mul_fusion.hpp>
15+
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
16+
#include <transformations/common_optimizations/moc_transformations.hpp>
17+
#include <transformations/common_optimizations/mul_conv_fusion.hpp>
18+
#include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
19+
#include <transformations/common_optimizations/mvn_fusion.hpp>
20+
#include <transformations/common_optimizations/pad_fusion.hpp>
21+
#include <transformations/common_optimizations/pull_through_reduce.hpp>
22+
#include <transformations/common_optimizations/reduce_reshape_fusion.hpp>
23+
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
24+
#include <transformations/common_optimizations/rms_fusion.hpp>
25+
#include <transformations/common_optimizations/shuffle_channels_fusion.hpp>
26+
#include <transformations/common_optimizations/space_to_batch_fusion.hpp>
27+
#include <transformations/common_optimizations/strides_optimization.hpp>
28+
#include <transformations/common_optimizations/transpose_to_reshape.hpp>
29+
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
30+
#include <transformations/control_flow/unroll_if.hpp>
31+
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
32+
#include <transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp>
33+
#include <transformations/init_node_info.hpp>
34+
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
35+
#include <transformations/op_conversions/batch_norm_decomposition.hpp>
36+
#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp>
37+
#include <transformations/op_conversions/convert_avgpool_downgrade.hpp>
38+
#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
39+
#include <transformations/op_conversions/convert_convertlike.hpp>
40+
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
41+
#include <transformations/op_conversions/convert_gather_upgrade.hpp>
42+
#include <transformations/op_conversions/convert_interpolate11_downgrade.hpp>
43+
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
44+
#include <transformations/op_conversions/convert_maxpool_downgrade.hpp>
45+
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
46+
#include <transformations/op_conversions/convert_pad12_downgrade.hpp>
47+
#include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
48+
#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
49+
#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
50+
#include <transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp>
51+
#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
52+
#include <transformations/op_conversions/convert_shapeof3.hpp>
53+
#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
54+
#include <transformations/op_conversions/convert_softmax_upgrade.hpp>
55+
#include <transformations/op_conversions/convert_topk11_downgrade.hpp>
56+
#include <transformations/op_conversions/detection_output_downgrade.hpp>
57+
#include <transformations/op_conversions/einsum_decomposition.hpp>
58+
#include <transformations/op_conversions/gelu7_downgrade.hpp>
59+
#include <transformations/op_conversions/group_normalization_decomposition.hpp>
60+
#include <transformations/op_conversions/log_softmax_decomposition.hpp>
61+
#include <transformations/op_conversions/normalize_l2_decomposition.hpp>
62+
#include <transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp>
63+
#include <transformations/op_conversions/softmax_decomposition.hpp>
64+
#include <transformations/rt_info/fused_names_attribute.hpp>
65+
#include <transformations/utils/utils.hpp>
866

967
#include "compiled_model.hpp"
1068
#include "compiler_adapter_factory.hpp"
@@ -185,6 +243,101 @@ void update_log_level(const std::map<std::string, std::string>& propertiesMap) {
185243
}
186244
}
187245

246+
void runOVPasses(const std::shared_ptr<ov::Model>& model) {
247+
ov::pass::Manager manager;
248+
manager.register_pass<ov::pass::InitNodeInfo>();
249+
ov::element::TypeVector decompression_precisions{
250+
ov::element::u4,
251+
ov::element::i4,
252+
ov::element::nf4,
253+
ov::element::u8,
254+
ov::element::i8,
255+
};
256+
manager.register_pass<ov::pass::MarkDequantization>(decompression_precisions, /*fold_subtract_const=*/true);
257+
manager.register_pass<ov::pass::KeepConstPrecision>(decompression_precisions, /*fold_subtract_const=*/true);
258+
manager.register_pass<ov::pass::ConvertQuantizeDequantize>();
259+
manager.register_pass<ov::pass::ConstantFolding>();
260+
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
261+
manager.register_pass<ov::pass::ConvertInterpolate1ToInterpolate4>();
262+
manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
263+
manager.register_pass<ov::pass::ConvertTopK11ToTopK3>();
264+
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
265+
manager.register_pass<ov::pass::ConstantFolding>();
266+
manager.register_pass<ov::pass::SliceToStridedSlice>(true);
267+
manager.register_pass<ov::pass::MOCTransformations>(true, false);
268+
269+
auto pass_config = manager.get_pass_config();
270+
pass_config->disable<ov::pass::PadFusionConvolution>();
271+
pass_config->disable<ov::pass::PadFusionGroupConvolution>();
272+
pass_config->disable<ov::pass::MVNFusionWithConstantsInside>();
273+
pass_config->disable<ov::pass::PullThroughReduce>();
274+
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
275+
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
276+
pass_config->disable<ov::pass::MulFakeQuantizeFusion>();
277+
278+
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
279+
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
280+
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
281+
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
282+
283+
auto static_shape = manager.register_pass<ov::pass::GraphRewrite>();
284+
static_shape->add_matcher<ov::pass::ConvertNMS9ToNMSIEInternal>();
285+
static_shape->set_name("ov::pass::CommonStaticShape");
286+
287+
auto common_fusions = manager.register_pass<ov::pass::GraphRewrite>();
288+
common_fusions->add_matcher<ov::pass::DepthToSpaceFusion>();
289+
common_fusions->add_matcher<ov::pass::ShuffleChannelsFusion>(false);
290+
common_fusions->add_matcher<ov::pass::SpaceToBatchFusion>();
291+
common_fusions->add_matcher<ov::pass::BatchToSpaceFusion>();
292+
common_fusions->add_matcher<ov::pass::TransposeToReshape>();
293+
common_fusions->add_matcher<ov::pass::RMSFusion>();
294+
common_fusions->set_name("ov::pass::CommonFusions");
295+
296+
auto decomp = manager.register_pass<ov::pass::GraphRewrite>();
297+
decomp->add_matcher<ov::pass::Gelu7Downgrade>();
298+
decomp->add_matcher<ov::pass::BidirectionalGRUSequenceDecomposition>();
299+
decomp->add_matcher<ov::pass::BidirectionalRNNSequenceDecomposition>();
300+
decomp->add_matcher<ov::pass::ConvertBroadcastToTiles>();
301+
decomp->add_matcher<ov::pass::ConvertConvertLike>();
302+
decomp->add_matcher<ov::pass::BatchNormDecomposition>();
303+
decomp->add_matcher<ov::pass::EinsumDecomposition>();
304+
decomp->add_matcher<ov::pass::DropoutWithRandomUniformReplacer>();
305+
decomp->add_matcher<ov::pass::ScaledDotProductAttentionDecomposition>();
306+
decomp->add_matcher<ov::pass::GroupNormalizationDecomposition>();
307+
decomp->set_name("ov::pass::CommonDecompositions");
308+
309+
manager.register_pass<ov::pass::ConstantFolding>();
310+
311+
manager.register_pass<ov::pass::LinOpSequenceFusion>();
312+
manager.register_pass<ov::pass::UnrollIf>();
313+
314+
auto conv_fusions = manager.register_pass<ov::pass::GraphRewrite>();
315+
conv_fusions->add_matcher<ov::pass::ConvolutionMultiplyFusion>();
316+
conv_fusions->add_matcher<ov::pass::GroupConvolutionMultiplyFusion>();
317+
conv_fusions->add_matcher<ov::pass::ConvolutionBackpropDataMultiplyFusion>();
318+
conv_fusions->add_matcher<ov::pass::GroupConvolutionBackpropDataMultiplyFusion>();
319+
conv_fusions->add_matcher<ov::pass::MultiplyConvolutionFusion>();
320+
conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionFusion>();
321+
conv_fusions->add_matcher<ov::pass::MultiplyConvolutionBackpropDataFusion>();
322+
conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionBackpropDataFusion>();
323+
conv_fusions->set_name("ov::pass::ConvFusions");
324+
325+
manager.register_pass<ov::pass::ConstantFolding>();
326+
manager.register_pass<ov::pass::ConvertGather1ToGather7>();
327+
manager.register_pass<ov::pass::ConvertGather7ToGather8>();
328+
manager.register_pass<ov::pass::ConvertDeformableConv8To1>();
329+
manager.register_pass<ov::pass::ConvertMaxPool14ToMaxPool8>();
330+
manager.register_pass<ov::pass::ConvertMaxPool8ToMaxPool1>();
331+
manager.register_pass<ov::pass::ConvertAvgPool14ToAvgPool1>();
332+
manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>();
333+
manager.register_pass<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
334+
manager.register_pass<ov::pass::ConvertShapeOf3>();
335+
manager.register_pass<ov::pass::StridesOptimization>();
336+
manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>();
337+
338+
manager.run_passes(model);
339+
}
340+
188341
} // namespace
189342

190343
namespace intel_npu {
@@ -1039,6 +1192,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
10391192
OPENVINO_THROW("Attempted to load a weightless compiled model, but no weights have been provided");
10401193
}
10411194

1195+
runOVPasses(originalModel);
1196+
10421197
if (!localConfig.get<BENCHMARK_INIT>()) {
10431198
const std::shared_ptr<ov::Model> modelDummy =
10441199
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);

0 commit comments

Comments
 (0)