Skip to content

Commit 81f0be8

Browse files
authored
Align ConvertPrecision with LSB-first nibble packing for u4/i4 types (#32144)
This PR fixes the nibble ordering inconsistency in 4-bit types (u4/i4) where ConvertPrecision transformation was not aligned with the current LSB-first packing standard. Problem ConvertPrecision transformation was using deprecated MSB-first nibble order (not updated after PR #20371), while Convert operation with ConstantFolding used the correct LSB-first order via element::iterator. This created inconsistencies between different code paths for the same input. Path inconsistency before fix: ``` Input: u4 constant bytes [0xAB] | v ┌──────┴──────┐ | | v v ConvertPrecision Convert → ConstantFolding (MSB-first) (LSB-first via is_lsb_packed) | | v v [10, 11] [11, 10] <-- Different results! ``` This led to incorrect values in INT4 quantized models (like Phi-3) when different optimization paths were applied. Solution Refactored `convert_low_precisions_int()` to use `Convert::evaluate()` instead of custom conversion logic. This ensures consistent LSB-first nibble packing across all conversion paths, aligning with the standard established in PR #20371. Path consistency after fix: ``` Path consistency after fix: Input: u4 constant bytes [0xAB] | v ┌──────┴──────┐ | | v v ConvertPrecision Convert → ConstantFolding (via Convert::evaluate) (LSB-first) | | v v [11, 10] [11, 10] <-- Same results! ``` ### Tickets: - CVS-161778
1 parent fa951e0 commit 81f0be8

File tree

2 files changed

+51
-121
lines changed

2 files changed

+51
-121
lines changed

src/common/transformations/src/transformations/convert_precision.cpp

Lines changed: 24 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "itt.hpp"
1111
#include "openvino/core/graph_util.hpp"
1212
#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
13+
#include "openvino/core/type/element_iterator.hpp"
1314
#include "openvino/op/ops.hpp"
1415
#include "openvino/pass/constant_folding.hpp"
1516
#include "openvino/pass/manager.hpp"
@@ -1266,102 +1267,31 @@ void convert_lp_value(const SRC& src,
12661267
std::shared_ptr<Node> convert_low_precisions_int(std::shared_ptr<ov::op::v0::Constant>& constant,
12671268
ov::element::Type to) {
12681269
// Supported integer precisions
1269-
static const precisions_set_t supported_integer_precisions = {ov::element::i4, ov::element::u4, ov::element::u1};
1270-
// Get source element type and source data
1270+
static const precisions_set_t supported_integer_precisions = {ov::element::i4,
1271+
ov::element::u4,
1272+
ov::element::u1,
1273+
ov::element::u2};
12711274
auto src_type = constant->get_element_type();
1272-
const auto* src_data = reinterpret_cast<const uint8_t*>(constant->get_data_ptr());
1273-
1274-
// We support conversion only if several elements can be represented in one instance of some
1275-
// C++ common data type without any exception, destination data type should be bigger than
1276-
// source and destination data type should be real
1277-
if (!supported_integer_precisions.count(src_type) || (src_type.size() * 8) % src_type.bitwidth() ||
1278-
(to.size() * 8) % to.bitwidth() || to.is_real() || to.bitwidth() < src_type.bitwidth())
1279-
OPENVINO_THROW("Convert low precision for " + constant->get_element_type().get_type_name() + " to " +
1280-
to.get_type_name() + " is not implemented!");
1281-
1282-
// Create a new constant operation and get destination data
1283-
auto new_constant = std::make_shared<ov::op::v0::Constant>(to, constant->get_shape());
1284-
auto* dst_data = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(new_constant->get_data_ptr()));
1285-
// Check pointers
1286-
if (src_data == nullptr || dst_data == nullptr)
1287-
OPENVINO_THROW("Can't get data pointer");
1288-
1289-
// Convert values
1290-
const auto size = shape_size(constant->get_shape());
1291-
size_t src_idx(0), dst_idx(0), dst_off(0), src_off(0);
1292-
if (src_type.bitwidth() < 8) {
1293-
src_off = 8 - src_type.bitwidth();
1294-
}
1295-
1296-
if (to.bitwidth() < 8) {
1297-
dst_off = 8 - to.bitwidth();
1298-
}
1299-
1300-
for (size_t i = 0; i < size; i++) {
1301-
// Source type at the current moment always less than 1 byte
1302-
// Select the right destination type
1303-
switch (to.size()) {
1304-
case 1:
1305-
convert_lp_value<uint8_t, uint8_t>(src_data[src_idx],
1306-
dst_data[dst_idx],
1307-
src_off,
1308-
src_type.bitwidth(),
1309-
dst_off,
1310-
to.bitwidth(),
1311-
src_type.is_signed());
1312-
break;
1313-
case 2:
1314-
convert_lp_value<uint8_t, uint16_t>(src_data[src_idx],
1315-
reinterpret_cast<uint16_t*>(dst_data)[dst_idx],
1316-
src_off,
1317-
src_type.bitwidth(),
1318-
dst_off,
1319-
to.bitwidth(),
1320-
src_type.is_signed());
1321-
break;
1322-
case 4:
1323-
convert_lp_value<uint8_t, uint32_t>(src_data[src_idx],
1324-
reinterpret_cast<uint32_t*>(dst_data)[dst_idx],
1325-
src_off,
1326-
src_type.bitwidth(),
1327-
dst_off,
1328-
to.bitwidth(),
1329-
src_type.is_signed());
1330-
break;
1331-
case 8:
1332-
convert_lp_value<uint8_t, uint64_t>(src_data[src_idx],
1333-
reinterpret_cast<uint64_t*>(dst_data)[dst_idx],
1334-
src_off,
1335-
src_type.bitwidth(),
1336-
dst_off,
1337-
to.bitwidth(),
1338-
src_type.is_signed());
1339-
break;
1340-
default:
1341-
OPENVINO_THROW("Unsupported element size!");
1342-
}
1343-
// Calculate offsets and indexes
1344-
if (src_type.bitwidth() < 8) {
1345-
if (src_off == 0) {
1346-
src_off = 8;
1347-
src_idx++;
1348-
}
1349-
src_off -= src_type.bitwidth();
1350-
} else {
1351-
src_idx++;
1352-
}
1353-
if (to.bitwidth() < 8) {
1354-
if (dst_off == 0) {
1355-
dst_off = 8;
1356-
dst_idx++;
1357-
}
1358-
dst_off -= to.bitwidth();
1359-
} else {
1360-
dst_idx++;
1361-
}
1362-
}
13631275

1364-
return new_constant;
1276+
// Validate conversion is supported
1277+
if (!supported_integer_precisions.count(src_type) || to.is_real() || to.bitwidth() < src_type.bitwidth()) {
1278+
OPENVINO_THROW("Convert low precision for ",
1279+
src_type.get_type_name(),
1280+
" to ",
1281+
to.get_type_name(),
1282+
" is not implemented!");
1283+
}
1284+
1285+
// Use Convert operator's evaluate() to perform conversion
1286+
// This ensures consistent nibble packing order (LSB-first) with element::iterator
1287+
auto outputs = ov::TensorVector{{to, constant->get_shape()}};
1288+
OPENVINO_ASSERT(ov::op::v0::Convert(constant, to).evaluate(outputs, {constant->get_tensor_view()}),
1289+
"Failed to convert constant from ",
1290+
src_type.get_type_name(),
1291+
" to ",
1292+
to.get_type_name());
1293+
1294+
return std::make_shared<ov::op::v0::Constant>(std::move(outputs[0]));
13651295
}
13661296

13671297
} // namespace

src/common/transformations/tests/utils/convert_precision.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,110 +1598,110 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_BoolToU8) {
15981598
}
15991599

16001600
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToI8) {
1601-
constant_convert_test<uint8_t, int8_t>(element::u4, element::i8, std::vector<uint8_t>{171}, {10, 11});
1601+
constant_convert_test<uint8_t, int8_t>(element::u4, element::i8, std::vector<uint8_t>{171}, {11, 10});
16021602
}
16031603

16041604
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToU8) {
1605-
constant_convert_test<uint8_t, uint8_t>(element::u4, element::u8, {171}, {10, 11});
1605+
constant_convert_test<uint8_t, uint8_t>(element::u4, element::u8, {171}, {11, 10});
16061606
}
16071607

16081608
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToI8_2) {
1609-
constant_convert_test<uint8_t, int8_t>(element::u4, element::i8, {96}, {6, 0});
1609+
constant_convert_test<uint8_t, int8_t>(element::u4, element::i8, {96}, {0, 6});
16101610
}
16111611

16121612
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToU8_96) {
1613-
constant_convert_test<uint8_t, uint8_t>(element::u4, element::u8, {96}, {6, 0});
1613+
constant_convert_test<uint8_t, uint8_t>(element::u4, element::u8, {96}, {0, 6});
16141614
}
16151615

16161616
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU8) {
1617-
constant_convert_test<uint8_t, uint8_t>(element::i4, element::u8, {96}, {6, 0});
1617+
constant_convert_test<uint8_t, uint8_t>(element::i4, element::u8, {96}, {0, 6});
16181618
}
16191619

16201620
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI8) {
1621-
constant_convert_test<uint8_t, int8_t>(element::i4, element::i8, {96}, {6, 0});
1621+
constant_convert_test<uint8_t, int8_t>(element::i4, element::i8, {96}, {0, 6});
16221622
}
16231623

16241624
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU8_neg) {
1625-
constant_convert_test<uint8_t, uint8_t>(element::i4, element::u8, {171}, {250, 251});
1625+
constant_convert_test<uint8_t, uint8_t>(element::i4, element::u8, {171}, {251, 250});
16261626
}
16271627

16281628
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI8_neg) {
1629-
constant_convert_test<uint8_t, int8_t>(element::i4, element::i8, {171}, {-6, -5});
1629+
constant_convert_test<uint8_t, int8_t>(element::i4, element::i8, {171}, {-5, -6});
16301630
}
16311631

16321632
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToI32) {
1633-
constant_convert_test<uint8_t, int32_t>(element::u4, element::i32, {171}, {10, 11});
1633+
constant_convert_test<uint8_t, int32_t>(element::u4, element::i32, {171}, {11, 10});
16341634
}
16351635

16361636
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToU32) {
1637-
constant_convert_test<uint8_t, uint32_t>(element::u4, element::u32, {171}, {10, 11});
1637+
constant_convert_test<uint8_t, uint32_t>(element::u4, element::u32, {171}, {11, 10});
16381638
}
16391639

16401640
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU32) {
1641-
constant_convert_test<uint8_t, uint32_t>(element::i4, element::u32, {96}, {6, 0});
1641+
constant_convert_test<uint8_t, uint32_t>(element::i4, element::u32, {96}, {0, 6});
16421642
}
16431643

16441644
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI32) {
1645-
constant_convert_test<uint8_t, int32_t>(element::i4, element::i32, {96}, {6, 0});
1645+
constant_convert_test<uint8_t, int32_t>(element::i4, element::i32, {96}, {0, 6});
16461646
}
16471647

16481648
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU32_neg) {
1649-
constant_convert_test<uint8_t, uint32_t>(element::i4, element::u32, {171}, {4294967290, 4294967291});
1649+
constant_convert_test<uint8_t, uint32_t>(element::i4, element::u32, {171}, {4294967291, 4294967290});
16501650
}
16511651

16521652
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI32_neg) {
1653-
constant_convert_test<uint8_t, int32_t>(element::i4, element::i32, {171}, {-6, -5});
1653+
constant_convert_test<uint8_t, int32_t>(element::i4, element::i32, {171}, {-5, -6});
16541654
}
16551655

16561656
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToI16) {
1657-
constant_convert_test<uint8_t, int16_t>(element::u4, element::i16, {171}, {10, 11});
1657+
constant_convert_test<uint8_t, int16_t>(element::u4, element::i16, {171}, {11, 10});
16581658
}
16591659

16601660
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToU16) {
1661-
constant_convert_test<uint8_t, uint16_t>(element::u4, element::u16, {171}, {10, 11});
1661+
constant_convert_test<uint8_t, uint16_t>(element::u4, element::u16, {171}, {11, 10});
16621662
}
16631663

16641664
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU16) {
1665-
constant_convert_test<uint8_t, uint16_t>(element::i4, element::u16, {96}, {6, 0});
1665+
constant_convert_test<uint8_t, uint16_t>(element::i4, element::u16, {96}, {0, 6});
16661666
}
16671667

16681668
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI16) {
1669-
constant_convert_test<uint8_t, int16_t>(element::i4, element::i16, {96}, {6, 0});
1669+
constant_convert_test<uint8_t, int16_t>(element::i4, element::i16, {96}, {0, 6});
16701670
}
16711671

16721672
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU16_neg) {
1673-
constant_convert_test<uint8_t, uint16_t>(element::i4, element::u16, {171}, {65530, 65531});
1673+
constant_convert_test<uint8_t, uint16_t>(element::i4, element::u16, {171}, {65531, 65530});
16741674
}
16751675

16761676
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI16_neg) {
1677-
constant_convert_test<uint8_t, int16_t>(element::i4, element::i16, {171}, {-6, -5});
1677+
constant_convert_test<uint8_t, int16_t>(element::i4, element::i16, {171}, {-5, -6});
16781678
}
16791679

16801680
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToI64) {
1681-
constant_convert_test<uint8_t, int64_t>(element::u4, element::i64, {171}, {10, 11});
1681+
constant_convert_test<uint8_t, int64_t>(element::u4, element::i64, {171}, {11, 10});
16821682
}
16831683

16841684
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U4ToU64) {
1685-
constant_convert_test<uint8_t, int64_t>(element::u4, element::u64, {171}, {10, 11});
1685+
constant_convert_test<uint8_t, int64_t>(element::u4, element::u64, {171}, {11, 10});
16861686
}
16871687

16881688
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU64) {
1689-
constant_convert_test<uint8_t, uint64_t>(element::i4, element::u64, {96}, {6, 0});
1689+
constant_convert_test<uint8_t, uint64_t>(element::i4, element::u64, {96}, {0, 6});
16901690
}
16911691

16921692
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI64) {
1693-
constant_convert_test<uint8_t, int64_t>(element::i4, element::i64, {96}, {6, 0});
1693+
constant_convert_test<uint8_t, int64_t>(element::i4, element::i64, {96}, {0, 6});
16941694
}
16951695

16961696
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToU64_neg) {
16971697
constant_convert_test<uint8_t, uint64_t>(element::i4,
16981698
element::u64,
16991699
{171},
1700-
{18446744073709551610u, 18446744073709551611u});
1700+
{18446744073709551611u, 18446744073709551610u});
17011701
}
17021702

17031703
TEST(TransformationTests, ConvertPrecision_ConstantConversion_I4ToI64_neg) {
1704-
constant_convert_test<uint8_t, int64_t>(element::i4, element::i64, {171}, {-6, -5});
1704+
constant_convert_test<uint8_t, int64_t>(element::i4, element::i64, {171}, {-5, -6});
17051705
}
17061706

17071707
TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU8) {
@@ -1712,7 +1712,7 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) {
17121712
constant_convert_test<uint8_t, uint8_t>(element::u1,
17131713
element::u4,
17141714
std::vector<uint8_t>{171},
1715-
{0, 1, 0, 1, 0, 1, 1, 1});
1715+
{1, 0, 1, 0, 1, 0, 1, 1});
17161716
}
17171717

17181718
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) {

0 commit comments

Comments
 (0)