mvafin
diff --git a/‎src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp
+11 b/‎src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp
+11
diff --git a/‎src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp
+35 b/‎src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp
+35
diff --git a/‎src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp
-35 b/‎src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp
-35
diff --git a/‎src/common/transformations/src/transformations/convert_precision.cpp
+3-3 b/‎src/common/transformations/src/transformations/convert_precision.cpp
+3-3
diff --git a/‎src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp
+47-2 b/‎src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp
+47-2
diff --git a/‎src/common/transformations/src/transformations/rt_info/keep_const_precision.cpp
+20 b/‎src/common/transformations/src/transformations/rt_info/keep_const_precision.cpp
+20
diff --git a/‎src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp
-20 b/‎src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp
-20
diff --git a/‎src/common/transformations/tests/common_optimizations/keep_constants_precision_and_add_converts_test.cpp
+86 b/‎src/common/transformations/tests/common_optimizations/keep_constants_precision_and_add_converts_test.cpp
+86
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+16-14 b/‎src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+16-14
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
+14-2 b/‎src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
+14-2
@@ -14,6 +14,7 @@ namespace pass {
 class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
 class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding;
 class TRANSFORMATIONS_API KeepConstAndDecompression;
+class TRANSFORMATIONS_API KeepConstantsPrecisionAndAddConverts;
 
 }  // namespace pass
 }  // namespace ov
@@ -47,3 +48,13 @@ class ov::pass::KeepConstAndDecompression : public MatcherPass {
     OPENVINO_RTTI("KeepConstAndDecompression", "0");
     KeepConstAndDecompression();
 };
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Prevents Consts precision conversion and adds Convert with disabled ConstantFolding
+ */
+class ov::pass::KeepConstantsPrecisionAndAddConverts : public MatcherPass {
+public:
+    OPENVINO_RTTI("KeepConstantsPrecisionAndAddConverts", "0");
+    KeepConstantsPrecisionAndAddConverts();
+};
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/runtime_attribute.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+
+TRANSFORMATIONS_API void enable_keep_const_precision(const std::shared_ptr<Node>& node);
+
+TRANSFORMATIONS_API void disable_keep_const_precision(const std::shared_ptr<Node>& node);
+
+TRANSFORMATIONS_API bool is_keep_const_precision(const std::shared_ptr<const Node>& node);
+
+/**
+ * @ingroup ie_runtime_attr_api
+ * @brief KeepConstPrecision class represents runtime info attribute that marks a Constant
+ * as prohibitted to fuse precision in ConvertPrecision
+ */
+class TRANSFORMATIONS_API KeepConstPrecision : public RuntimeAttribute {
+public:
+    OPENVINO_RTTI("keep_const_precision", "0");
+
+    KeepConstPrecision() = default;
+
+    bool is_copyable() const override {
+        return false;
+    }
+};
+
+}  // namespace ov
@@ -26,7 +26,7 @@
 #include "transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp"
 #include "transformations/rt_info/decompression.hpp"
 #include "transformations/rt_info/disable_fp16_compression.hpp"
-#include "transformations/rt_info/keep_fp16_const.hpp"
+#include "transformations/rt_info/keep_const_precision.hpp"
 #include "transformations/utils/utils.hpp"
 
 using namespace ov;
@@ -1125,8 +1125,8 @@ std::shared_ptr<Node> convert_low_precisions_int(std::shared_ptr<opset4::Constan
 bool fuse_type_to_constant(const std::shared_ptr<ov::Node>& node,
                            const precisions_map& precisions,
                            const std::vector<Input<Node>>& consumers) {
-    // Consts marked with disable_constant_folding should be kept in f16 until they reach the plugin
-    if (is_keep_fp16_const(node))
+    // Consts marked with is_keep_const_precision should be kept in their own precision until they reach the plugin
+    if (is_keep_const_precision(node))
         return false;
 
     auto from = node->get_element_type();
 
@@ -5,14 +5,16 @@
 #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
 
 #include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
 #include "openvino/op/matmul.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "transformations/rt_info/decompression.hpp"
 #include "transformations/rt_info/disable_constant_folding.hpp"
+#include "transformations/rt_info/disable_fp16_compression.hpp"
 #include "transformations/rt_info/is_shape_subgraph.hpp"
-#include "transformations/rt_info/keep_fp16_const.hpp"
+#include "transformations/rt_info/keep_const_precision.hpp"
 
 using namespace ov;
 
@@ -67,10 +69,53 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
 
         if (!is_type<ov::op::v0::Constant>(node->input_value(0).get_node_shared_ptr()))
             return false;
-        enable_keep_fp16_const(node->input_value(0).get_node_shared_ptr());
+        enable_keep_const_precision(node->input_value(0).get_node_shared_ptr());
 
         return false;
     };
     auto m = std::make_shared<pattern::Matcher>(node_pattern, matcher_name);
     register_matcher(m, callback);
 }
+
+pass::KeepConstantsPrecisionAndAddConverts::KeepConstantsPrecisionAndAddConverts() {
+    MATCHER_SCOPE(KeepConstantsPrecisionAndAddConverts);
+    auto const_pattern = pattern::wrap_type<ov::op::v0::Constant>();
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto const_node = m.get_match_root();
+
+        if (transformation_callback(const_node)) {
+            return false;
+        }
+
+        enable_keep_const_precision(const_node);
+
+        const auto& constant_target_inputs = const_node->get_output_target_inputs(0);
+        const auto& next_node = constant_target_inputs.begin()->get_node()->shared_from_this();
+        if (is_type<ov::op::v0::Convert>(next_node)) {
+            disable_constant_folding(next_node);
+            if (is_decompression(next_node)) {
+                unmark_as_decompression(next_node);
+            }
+            return true;
+        }
+
+        auto convert = std::make_shared<ov::op::v0::Convert>(const_node, const_node->get_element_type());
+        convert->set_friendly_name(const_node->get_friendly_name());
+
+        std::string postfix = const_node->get_element_type() == ov::element::f32 ? "compression" : "decompression";
+        const_node->set_friendly_name(const_node->get_friendly_name() + "_postponed_" + postfix);
+
+        ov::copy_runtime_info(const_node, convert);
+        disable_constant_folding(convert);
+
+        for (const auto& target_input : constant_target_inputs) {
+            target_input.replace_source_output(convert);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<pass::pattern::Matcher>(const_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/rt_info/keep_const_precision.hpp"
+
+void ov::enable_keep_const_precision(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info[KeepConstPrecision::get_type_info_static()] = KeepConstPrecision{};
+}
+
+void ov::disable_keep_const_precision(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info.erase(KeepConstPrecision::get_type_info_static());
+}
+
+bool ov::is_keep_const_precision(const std::shared_ptr<const Node>& node) {
+    const auto& rt_info = node->get_rt_info();
+    return rt_info.count(KeepConstPrecision::get_type_info_static());
+}
@@ -0,0 +1,86 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "transformations/convert_precision.hpp"
+#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace testing;
+using namespace ov;
+using namespace ov::opset1;
+using const_node_ptr = const std::shared_ptr<const Node>;
+
+TEST_F(TransformationTestsF, KeepConstantsPrecisionAndAddConvertsTestBase) {
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{3, 2, 2});
+        auto weights = Constant::create(element::f32, Shape{1, 2, 2}, {1});
+        auto matmul = std::make_shared<MatMul>(input, weights);
+
+        model = std::make_shared<Model>(NodeVector{matmul}, ParameterVector{input});
+
+        manager.register_pass<pass::KeepConstantsPrecisionAndAddConverts>();
+        manager.get_pass_config()->set_callback<pass::KeepConstantsPrecisionAndAddConverts>(
+            [](const_node_ptr& node) -> bool {
+                auto next_node = node->get_output_target_inputs(0).begin()->get_node();
+                if (is_type<op::v0::Convert>(next_node)) {
+                    next_node = next_node->get_output_target_inputs(0).begin()->get_node();
+                }
+                return !is_type<op::v0::MatMul>(next_node);
+            });
+
+        const precisions_map precisions = {{element::f32, element::f16}};
+        const type_to_fuse_map empty_fuse_map = {};
+        const bool keep_precision_sensitive_in_fp32_1 = true;
+        manager.register_pass<pass::ConvertPrecision>(precisions, empty_fuse_map, keep_precision_sensitive_in_fp32_1);
+    }
+    {
+        auto input = std::make_shared<Parameter>(element::f16, Shape{3, 2, 2});
+        auto weights = Constant::create(element::f32, Shape{1, 2, 2}, {1});
+        auto convert_weights = std::make_shared<Convert>(weights, element::f16);
+        auto matmul = std::make_shared<MatMul>(input, convert_weights);
+
+        model_ref = std::make_shared<Model>(NodeVector{matmul}, ParameterVector{input});
+    }
+}
+
+TEST_F(TransformationTestsF, KeepConstantsPrecisionAndAddConvertsTestWithCompressedConvert) {
+    {
+        auto input = std::make_shared<Parameter>(element::f16, Shape{3, 2, 2});
+        auto weights = Constant::create(element::f32, Shape{1, 2, 2}, {1});
+        auto convert_weights = std::make_shared<Convert>(weights, element::f16);
+        mark_as_decompression(convert_weights);
+        auto matmul = std::make_shared<MatMul>(input, convert_weights);
+
+        model = std::make_shared<Model>(NodeVector{matmul}, ParameterVector{input});
+
+        manager.register_pass<pass::KeepConstantsPrecisionAndAddConverts>();
+        manager.get_pass_config()->set_callback<pass::KeepConstantsPrecisionAndAddConverts>(
+            [](const_node_ptr& node) -> bool {
+                auto next_node = node->get_output_target_inputs(0).begin()->get_node();
+                if (is_type<op::v0::Convert>(next_node)) {
+                    next_node = next_node->get_output_target_inputs(0).begin()->get_node();
+                }
+                return !is_type<op::v0::MatMul>(next_node);
+            });
+
+        const precisions_map precisions = {{element::f32, element::f16}};
+        const type_to_fuse_map empty_fuse_map = {};
+        const bool keep_precision_sensitive_in_fp32_1 = true;
+        manager.register_pass<pass::ConvertPrecision>(precisions, empty_fuse_map, keep_precision_sensitive_in_fp32_1);
+    }
+    {
+        auto input = std::make_shared<Parameter>(element::f16, Shape{3, 2, 2});
+        auto weights = Constant::create(element::f32, Shape{1, 2, 2}, {1});
+        auto convert_weights = std::make_shared<Convert>(weights, element::f16);
+        auto matmul = std::make_shared<MatMul>(input, convert_weights);
+
+        model_ref = std::make_shared<Model>(NodeVector{matmul}, ParameterVector{input});
+    }
+}
@@ -35,20 +35,22 @@ class ICompilationContext;
 struct program {
     using ptr = std::shared_ptr<program>;
     using cptr = std::shared_ptr<const program>;
-    friend class calculate_prior_boxes;      // to be removed when possible
-    friend class graph_initializations;      // to be removed when possible
-    friend class prepare_padding;            // to be removed when possible
-    friend class propagate_constants;        // to be removed when possible
-    friend class pre_replace_deconv;         // to be removed when possible
-    friend class prepare_primitive_fusing;   // to be removed when possible
-    friend class prepare_quantization;       // to be removed when possible
-    friend class prepare_conv_eltw_fusing;   // to be removed when possible
-    friend class reorder_inputs;             // to be removed when possible
-    friend class remove_redundant_reorders;  // to be removed when possible
-    friend class post_optimize_weights;      // to be removed when possible
-    friend class program_wrapper;            // this class is intended to extend the interface of program for
-                                             // the usage within tests_core_internal project only
-    friend class prepare_primitive_fusing_through;   // to be removed when possible
+    friend class calculate_prior_boxes;             // to be removed when possible
+    friend class graph_initializations;             // to be removed when possible
+    friend class prepare_padding;                   // to be removed when possible
+    friend class propagate_constants;               // to be removed when possible
+    friend class pre_replace_deconv;                // to be removed when possible
+    friend class prepare_primitive_fusing;          // to be removed when possible
+    friend class prepare_quantization;              // to be removed when possible
+    friend class prepare_conv_eltw_fusing;          // to be removed when possible
+    friend class reorder_inputs;                    // to be removed when possible
+    friend class remove_redundant_reorders;         // to be removed when possible
+    friend class post_optimize_weights;             // to be removed when possible
+    friend class prepare_primitive_fusing_through;  // to be removed when possible
+    friend class reorder_transfer;                  // to be removed when possible
+    friend class fuse_constant_transposes;          // to be removed when possible
+    friend class program_wrapper;                   // this class is intended to extend the interface of program for
+                                                    // the usage within tests_core_internal project only
 public:
     struct nodes_ordering {
     public:
 
@@ -80,6 +80,7 @@ struct format {
         bfvuwzyx,                               ///< 8d tensor
         yxfb,                                   ///< batch first, feature and than spatials
         byxf,                                   ///< used in bitmaps, input from user i.e b images of RGB format
+        fbyx,
         fyxb,                                   ///< format not used inside clDNN, but supported in reorder as extension
         bzyxf,
         byfx,                                   ///< To be used when onednn gemm allows permute fusing in transformer network. Not for normal use from cldnn.
@@ -341,8 +342,9 @@ struct format {
         return (fmt == yxfb || fmt == byxf ||
                 fmt == byfx || fmt == bxfy ||
                 fmt == bfyx || fmt == fyxb ||
-                fmt == bfzyx || fmt == bfwzyx ||
-                fmt == bfuwzyx || fmt == bfvuwzyx);
+                fmt == fbyx || fmt == bfzyx ||
+                fmt == bfwzyx || fmt == bfuwzyx ||
+                fmt == bfvuwzyx);
     }
 
     static format get_default_format(size_t rank, bool is_weights = false, bool is_grouped = false);
@@ -352,6 +354,14 @@ struct format {
 
     static const std::vector<std::pair<size_t, int>> per_axis_block_size(format fmt);
 
+    static format find_format(const std::vector<uint64_t>& order,
+                              const std::vector<std::pair<size_t, int>>& block_sizes,
+                              bool is_weights = false,
+                              bool is_grouped = false,
+                              bool is_image_2d = false,
+                              bool is_winograd = false,
+                              bool is_nv12 = false);
+
     /// @brief Checks if @p format is of grouped type
     static bool is_grouped(type fmt) { return group_num(fmt) != 0; }
     /// @brief Checks if @p format is of image type
@@ -373,6 +383,8 @@ struct format {
     size_t spatial_num() const { return traits(value).spatial_num; }
     /// @brief Returns number of group dimensions.
     size_t group_num() const { return traits(value).group_num; }
+    /// @brief Returns an order of dimensions.
+    const std::vector<uint64_t>& dims_order() const { return traits(value)._order; }
     /// @brief Returns an order of dimensions in form of string.
     const std::string& order() const { return traits(value).order; }
     /// @brief Returns an internal orders of dimensions form of string.