|
5 | 5 | #include "plugin.hpp"
|
6 | 6 |
|
7 | 7 | #include <fstream>
|
| 8 | +#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp> |
| 9 | +#include <transformations/common_optimizations/batch_to_space_fusion.hpp> |
| 10 | +#include <transformations/common_optimizations/conv_mul_fusion.hpp> |
| 11 | +#include <transformations/common_optimizations/convert_quantize_dequantize.hpp> |
| 12 | +#include <transformations/common_optimizations/depth_to_space_fusion.hpp> |
| 13 | +#include <transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp> |
| 14 | +#include <transformations/common_optimizations/fq_mul_fusion.hpp> |
| 15 | +#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp> |
| 16 | +#include <transformations/common_optimizations/moc_transformations.hpp> |
| 17 | +#include <transformations/common_optimizations/mul_conv_fusion.hpp> |
| 18 | +#include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp> |
| 19 | +#include <transformations/common_optimizations/mvn_fusion.hpp> |
| 20 | +#include <transformations/common_optimizations/pad_fusion.hpp> |
| 21 | +#include <transformations/common_optimizations/pull_through_reduce.hpp> |
| 22 | +#include <transformations/common_optimizations/reduce_reshape_fusion.hpp> |
| 23 | +#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp> |
| 24 | +#include <transformations/common_optimizations/rms_fusion.hpp> |
| 25 | +#include <transformations/common_optimizations/shuffle_channels_fusion.hpp> |
| 26 | +#include <transformations/common_optimizations/space_to_batch_fusion.hpp> |
| 27 | +#include <transformations/common_optimizations/strides_optimization.hpp> |
| 28 | +#include <transformations/common_optimizations/transpose_to_reshape.hpp> |
| 29 | +#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp> |
| 30 | +#include <transformations/control_flow/unroll_if.hpp> |
| 31 | +#include <transformations/control_flow/unroll_tensor_iterator.hpp> |
| 32 | +#include <transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp> |
| 33 | +#include <transformations/init_node_info.hpp> |
| 34 | +#include <transformations/low_precision/mark_dequantization_subgraph.hpp> |
| 35 | +#include <transformations/op_conversions/batch_norm_decomposition.hpp> |
| 36 | +#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp> |
| 37 | +#include <transformations/op_conversions/convert_avgpool_downgrade.hpp> |
| 38 | +#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp> |
| 39 | +#include <transformations/op_conversions/convert_convertlike.hpp> |
| 40 | +#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp> |
| 41 | +#include <transformations/op_conversions/convert_gather_upgrade.hpp> |
| 42 | +#include <transformations/op_conversions/convert_interpolate11_downgrade.hpp> |
| 43 | +#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp> |
| 44 | +#include <transformations/op_conversions/convert_maxpool_downgrade.hpp> |
| 45 | +#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp> |
| 46 | +#include <transformations/op_conversions/convert_pad12_downgrade.hpp> |
| 47 | +#include <transformations/op_conversions/convert_pad_to_group_conv.hpp> |
| 48 | +#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp> |
| 49 | +#include <transformations/op_conversions/convert_reduce_to_pooling.hpp> |
| 50 | +#include <transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp> |
| 51 | +#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp> |
| 52 | +#include <transformations/op_conversions/convert_shapeof3.hpp> |
| 53 | +#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp> |
| 54 | +#include <transformations/op_conversions/convert_softmax_upgrade.hpp> |
| 55 | +#include <transformations/op_conversions/convert_topk11_downgrade.hpp> |
| 56 | +#include <transformations/op_conversions/detection_output_downgrade.hpp> |
| 57 | +#include <transformations/op_conversions/einsum_decomposition.hpp> |
| 58 | +#include <transformations/op_conversions/gelu7_downgrade.hpp> |
| 59 | +#include <transformations/op_conversions/group_normalization_decomposition.hpp> |
| 60 | +#include <transformations/op_conversions/log_softmax_decomposition.hpp> |
| 61 | +#include <transformations/op_conversions/normalize_l2_decomposition.hpp> |
| 62 | +#include <transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp> |
| 63 | +#include <transformations/op_conversions/softmax_decomposition.hpp> |
| 64 | +#include <transformations/rt_info/fused_names_attribute.hpp> |
| 65 | +#include <transformations/utils/utils.hpp> |
8 | 66 |
|
9 | 67 | #include "compiled_model.hpp"
|
10 | 68 | #include "compiler_adapter_factory.hpp"
|
@@ -185,6 +243,101 @@ void update_log_level(const std::map<std::string, std::string>& propertiesMap) {
|
185 | 243 | }
|
186 | 244 | }
|
187 | 245 |
|
| 246 | +void runOVPasses(const std::shared_ptr<ov::Model>& model) { |
| 247 | + ov::pass::Manager manager; |
| 248 | + manager.register_pass<ov::pass::InitNodeInfo>(); |
| 249 | + ov::element::TypeVector decompression_precisions{ |
| 250 | + ov::element::u4, |
| 251 | + ov::element::i4, |
| 252 | + ov::element::nf4, |
| 253 | + ov::element::u8, |
| 254 | + ov::element::i8, |
| 255 | + }; |
| 256 | + manager.register_pass<ov::pass::MarkDequantization>(decompression_precisions, /*fold_subtract_const=*/true); |
| 257 | + manager.register_pass<ov::pass::KeepConstPrecision>(decompression_precisions, /*fold_subtract_const=*/true); |
| 258 | + manager.register_pass<ov::pass::ConvertQuantizeDequantize>(); |
| 259 | + manager.register_pass<ov::pass::ConstantFolding>(); |
| 260 | + manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>(); |
| 261 | + manager.register_pass<ov::pass::ConvertInterpolate1ToInterpolate4>(); |
| 262 | + manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>(); |
| 263 | + manager.register_pass<ov::pass::ConvertTopK11ToTopK3>(); |
| 264 | + manager.register_pass<ov::pass::ConvertPad12ToPad1>(); |
| 265 | + manager.register_pass<ov::pass::ConstantFolding>(); |
| 266 | + manager.register_pass<ov::pass::SliceToStridedSlice>(true); |
| 267 | + manager.register_pass<ov::pass::MOCTransformations>(true, false); |
| 268 | + |
| 269 | + auto pass_config = manager.get_pass_config(); |
| 270 | + pass_config->disable<ov::pass::PadFusionConvolution>(); |
| 271 | + pass_config->disable<ov::pass::PadFusionGroupConvolution>(); |
| 272 | + pass_config->disable<ov::pass::MVNFusionWithConstantsInside>(); |
| 273 | + pass_config->disable<ov::pass::PullThroughReduce>(); |
| 274 | + pass_config->disable<ov::pass::AddFakeQuantizeFusion>(); |
| 275 | + pass_config->disable<ov::pass::FakeQuantizeMulFusion>(); |
| 276 | + pass_config->disable<ov::pass::MulFakeQuantizeFusion>(); |
| 277 | + |
| 278 | + manager.register_pass<ov::pass::ConvertNMS1ToNMS9>(); |
| 279 | + manager.register_pass<ov::pass::ConvertNMS3ToNMS9>(); |
| 280 | + manager.register_pass<ov::pass::ConvertNMS4ToNMS9>(); |
| 281 | + manager.register_pass<ov::pass::ConvertNMS5ToNMS9>(); |
| 282 | + |
| 283 | + auto static_shape = manager.register_pass<ov::pass::GraphRewrite>(); |
| 284 | + static_shape->add_matcher<ov::pass::ConvertNMS9ToNMSIEInternal>(); |
| 285 | + static_shape->set_name("ov::pass::CommonStaticShape"); |
| 286 | + |
| 287 | + auto common_fusions = manager.register_pass<ov::pass::GraphRewrite>(); |
| 288 | + common_fusions->add_matcher<ov::pass::DepthToSpaceFusion>(); |
| 289 | + common_fusions->add_matcher<ov::pass::ShuffleChannelsFusion>(false); |
| 290 | + common_fusions->add_matcher<ov::pass::SpaceToBatchFusion>(); |
| 291 | + common_fusions->add_matcher<ov::pass::BatchToSpaceFusion>(); |
| 292 | + common_fusions->add_matcher<ov::pass::TransposeToReshape>(); |
| 293 | + common_fusions->add_matcher<ov::pass::RMSFusion>(); |
| 294 | + common_fusions->set_name("ov::pass::CommonFusions"); |
| 295 | + |
| 296 | + auto decomp = manager.register_pass<ov::pass::GraphRewrite>(); |
| 297 | + decomp->add_matcher<ov::pass::Gelu7Downgrade>(); |
| 298 | + decomp->add_matcher<ov::pass::BidirectionalGRUSequenceDecomposition>(); |
| 299 | + decomp->add_matcher<ov::pass::BidirectionalRNNSequenceDecomposition>(); |
| 300 | + decomp->add_matcher<ov::pass::ConvertBroadcastToTiles>(); |
| 301 | + decomp->add_matcher<ov::pass::ConvertConvertLike>(); |
| 302 | + decomp->add_matcher<ov::pass::BatchNormDecomposition>(); |
| 303 | + decomp->add_matcher<ov::pass::EinsumDecomposition>(); |
| 304 | + decomp->add_matcher<ov::pass::DropoutWithRandomUniformReplacer>(); |
| 305 | + decomp->add_matcher<ov::pass::ScaledDotProductAttentionDecomposition>(); |
| 306 | + decomp->add_matcher<ov::pass::GroupNormalizationDecomposition>(); |
| 307 | + decomp->set_name("ov::pass::CommonDecompositions"); |
| 308 | + |
| 309 | + manager.register_pass<ov::pass::ConstantFolding>(); |
| 310 | + |
| 311 | + manager.register_pass<ov::pass::LinOpSequenceFusion>(); |
| 312 | + manager.register_pass<ov::pass::UnrollIf>(); |
| 313 | + |
| 314 | + auto conv_fusions = manager.register_pass<ov::pass::GraphRewrite>(); |
| 315 | + conv_fusions->add_matcher<ov::pass::ConvolutionMultiplyFusion>(); |
| 316 | + conv_fusions->add_matcher<ov::pass::GroupConvolutionMultiplyFusion>(); |
| 317 | + conv_fusions->add_matcher<ov::pass::ConvolutionBackpropDataMultiplyFusion>(); |
| 318 | + conv_fusions->add_matcher<ov::pass::GroupConvolutionBackpropDataMultiplyFusion>(); |
| 319 | + conv_fusions->add_matcher<ov::pass::MultiplyConvolutionFusion>(); |
| 320 | + conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionFusion>(); |
| 321 | + conv_fusions->add_matcher<ov::pass::MultiplyConvolutionBackpropDataFusion>(); |
| 322 | + conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionBackpropDataFusion>(); |
| 323 | + conv_fusions->set_name("ov::pass::ConvFusions"); |
| 324 | + |
| 325 | + manager.register_pass<ov::pass::ConstantFolding>(); |
| 326 | + manager.register_pass<ov::pass::ConvertGather1ToGather7>(); |
| 327 | + manager.register_pass<ov::pass::ConvertGather7ToGather8>(); |
| 328 | + manager.register_pass<ov::pass::ConvertDeformableConv8To1>(); |
| 329 | + manager.register_pass<ov::pass::ConvertMaxPool14ToMaxPool8>(); |
| 330 | + manager.register_pass<ov::pass::ConvertMaxPool8ToMaxPool1>(); |
| 331 | + manager.register_pass<ov::pass::ConvertAvgPool14ToAvgPool1>(); |
| 332 | + manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>(); |
| 333 | + manager.register_pass<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>(); |
| 334 | + manager.register_pass<ov::pass::ConvertShapeOf3>(); |
| 335 | + manager.register_pass<ov::pass::StridesOptimization>(); |
| 336 | + manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>(); |
| 337 | + |
| 338 | + manager.run_passes(model); |
| 339 | +} |
| 340 | + |
188 | 341 | } // namespace
|
189 | 342 |
|
190 | 343 | namespace intel_npu {
|
@@ -1039,6 +1192,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
|
1039 | 1192 | OPENVINO_THROW("Attempted to load a weightless compiled model, but no weights have been provided");
|
1040 | 1193 | }
|
1041 | 1194 |
|
| 1195 | + runOVPasses(originalModel); |
| 1196 | + |
1042 | 1197 | if (!localConfig.get<BENCHMARK_INIT>()) {
|
1043 | 1198 | const std::shared_ptr<ov::Model> modelDummy =
|
1044 | 1199 | create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);
|
|
0 commit comments