Skip to content

Commit 2e88aa0

Browse files
authored
[GPU] Added int32 weights support for reorder_weights kernels (openvinotoolkit#20015)
1 parent c983b46 commit 2e88aa0

File tree

11 files changed

+124
-9
lines changed

11 files changed

+124
-9
lines changed

src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ enum class reorder_mean_mode {
2020
};
2121

2222
struct WeightsReorderParams {
23-
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed, bool grouped = false)
23+
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false)
2424
: _in_layout(in_layout),
2525
_out_layout(out_layout),
2626
_transposed(transposed),

src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,7 @@ kernel_selector::data_type to_data_type(data_types dt) {
136136
case cldnn::data_types::f32:
137137
return kernel_selector::data_type::F32;
138138
default:
139-
assert(0);
140-
return kernel_selector::data_type::F16;
139+
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector data type");
141140
}
142141
}
143142

@@ -158,8 +157,7 @@ data_types from_data_type(kernel_selector::data_type dt) {
158157
case kernel_selector::data_type::F32:
159158
return cldnn::data_types::f32;
160159
default:
161-
assert(0);
162-
return cldnn::data_types::f16;
160+
OPENVINO_THROW("[GPU] Unable to convert kernel_selector data type ", kernel_selector::toString(dt), " to cldnn data type");
163161
}
164162
}
165163

@@ -175,9 +173,10 @@ kernel_selector::weights_type to_weights_type(data_types dt) {
175173
return kernel_selector::weights_type::F16;
176174
case cldnn::data_types::f32:
177175
return kernel_selector::weights_type::F32;
176+
case cldnn::data_types::i32:
177+
return kernel_selector::weights_type::INT32;
178178
default:
179-
assert(0);
180-
return kernel_selector::weights_type::F16;
179+
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector weights type");
181180
}
182181
}
183182

@@ -193,9 +192,10 @@ data_types from_weights_type(kernel_selector::weights_type dt) {
193192
return data_types::f16;
194193
case kernel_selector::weights_type::F32:
195194
return data_types::f32;
195+
case kernel_selector::weights_type::INT32:
196+
return data_types::i32;
196197
default:
197-
assert(0);
198-
return data_types::f16;
198+
OPENVINO_THROW("[GPU] Unable to convert kernel_selector weights type ", kernel_selector::toString(dt), " to cldnn data type");
199199
}
200200
}
201201

src/plugins/intel_gpu/src/kernel_selector/common_tools.h

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ inline uint32_t BytesPerElement(WeightsType wt) {
4141
return 2;
4242
case WeightsType::F32:
4343
case WeightsType::BINARY:
44+
case WeightsType::INT32:
4445
return 4;
4546
default:
4647
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");

src/plugins/intel_gpu/src/kernel_selector/common_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ enum class WeightsType {
125125
F32,
126126
INT8,
127127
UINT8,
128+
INT32
128129
};
129130

130131
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

src/plugins/intel_gpu/src/kernel_selector/jitter.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ std::string toCLType(WeightsType wType) {
123123
return "half";
124124
case WeightsType::F32:
125125
return GetTypeName<float>();
126+
case WeightsType::INT32:
127+
return GetTypeName<int32_t>();
126128
default:
127129
return "";
128130
}
@@ -1505,6 +1507,8 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma
15051507
return MakeTypeJitConstants(Datatype::UINT8, macroName);
15061508
case WeightsType::BINARY:
15071509
return MakeTypeJitConstants(Datatype::UINT32, macroName);
1510+
case WeightsType::INT32:
1511+
return MakeTypeJitConstants(Datatype::INT32, macroName);
15081512
}
15091513
assert(false || "Unreachable!");
15101514
// FIXME: Is there some builtin_unreachable available?

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ std::string toString(WeightsType wType) {
157157
case WeightsType::F32: return "F32";
158158
case WeightsType::INT8: return "INT8";
159159
case WeightsType::UINT8: return "UINT8";
160+
case WeightsType::INT32: return "INT32";
160161
default: return "";
161162
}
162163
}

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ void ParamsKey::EnableInputWeightsType(WeightsType wt) {
137137
case WeightsType::BINARY:
138138
key.inputWeightsType.val.binary = 1;
139139
break;
140+
case WeightsType::INT32:
141+
key.inputWeightsType.val.int32 = 1;
140142
default:
141143
break;
142144
}
@@ -158,6 +160,8 @@ void ParamsKey::EnableOutputWeightsType(WeightsType wt) {
158160
case WeightsType::BINARY:
159161
key.outputWeightsType.val.binary = 1;
160162
break;
163+
case WeightsType::INT32:
164+
key.outputWeightsType.val.int32 = 1;
161165
default:
162166
break;
163167
}

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ static WeightsType DataTypeToWeightsType(Datatype t) {
2626
return WeightsType::F32;
2727
case Datatype::BINARY:
2828
return WeightsType::BINARY;
29+
case Datatype::INT32:
30+
return WeightsType::INT32;
2931
default:
3032
return WeightsType::UNSUPPORTED;
3133
}

src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ ParamsKey ReorderWeightsKernel::GetSupportedKey() const {
1212
k.EnableInputWeightsType(WeightsType::INT8);
1313
k.EnableInputWeightsType(WeightsType::F16);
1414
k.EnableInputWeightsType(WeightsType::F32);
15+
k.EnableInputWeightsType(WeightsType::INT32);
1516
k.EnableOutputWeightsType(WeightsType::INT8);
1617
k.EnableOutputWeightsType(WeightsType::F16);
1718
k.EnableOutputWeightsType(WeightsType::F32);
19+
k.EnableOutputWeightsType(WeightsType::INT32);
1820
k.EnableAllInputWeightsLayout();
1921
k.EnableAllOutputWeightsLayout();
2022
k.EnableDifferentTypes();

src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_opt.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ ParamsKey ReorderWeightsOpt::GetSupportedKey() const {
1313
k.EnableInputWeightsType(WeightsType::INT8);
1414
k.EnableInputWeightsType(WeightsType::F16);
1515
k.EnableInputWeightsType(WeightsType::F32);
16+
k.EnableInputWeightsType(WeightsType::INT32);
1617
k.EnableOutputWeightsType(WeightsType::INT8);
1718
k.EnableOutputWeightsType(WeightsType::F16);
1819
k.EnableOutputWeightsType(WeightsType::F32);
20+
k.EnableOutputWeightsType(WeightsType::INT32);
1921
k.EnableInputWeightsLayout(WeightsLayout::oiyx);
2022
k.EnableInputWeightsLayout(WeightsLayout::ioyx);
2123
k.EnableInputWeightsLayout(WeightsLayout::oizyx);

src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp

+98
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,104 @@ TEST(reorder_gpu_i32, basic)
19281928
ASSERT_EQ(*(a_ptr++), val);
19291929
}
19301930

1931+
TEST(reorder_weights_gpu_i32, reorder_weights)
1932+
{
1933+
auto& engine = get_test_engine();
1934+
1935+
layout in_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 });
1936+
layout out_layout(data_types::i32, format::oiyx, { 2, 2, 2, 2 });
1937+
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
1938+
1939+
auto input = engine.allocate_memory(in_layout);
1940+
1941+
set_values(input, {
1942+
1.f, 0.f, 5.f, 1.5f,
1943+
2.f, 0.f, 6.f, 5.2f,
1944+
3.f, 0.5f, 7.f, 12.f,
1945+
4.f, -0.5f, 8.f, 8.f
1946+
});
1947+
1948+
topology topology {
1949+
input_layout("input", in_layout),
1950+
reorder("reorder", input_info("input"), weights_reorder_params)
1951+
};
1952+
1953+
ExecutionConfig config = get_test_default_config(engine);
1954+
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::oiyx, "reorder_weights", impl_types::ocl };
1955+
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
1956+
1957+
network network(engine, topology, config);
1958+
network.set_input_data("input", input);
1959+
1960+
auto outputs = network.execute();
1961+
ASSERT_EQ(outputs.size(), size_t(1));
1962+
ASSERT_EQ(outputs.begin()->first, "reorder");
1963+
1964+
std::vector<int32_t> ref_output = {
1965+
1, 0, 5, 1,
1966+
2, 0, 6, 5,
1967+
3, 0, 7, 12,
1968+
4, 0, 8, 8
1969+
};
1970+
1971+
auto output = outputs.begin()->second.get_memory();
1972+
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
1973+
1974+
ASSERT_EQ(output_ptr.size(), ref_output.size());
1975+
for (size_t i = 0; i < ref_output.size(); ++i) {
1976+
ASSERT_EQ(output_ptr[i], ref_output[i]);
1977+
}
1978+
}
1979+
1980+
TEST(reorder_weights_gpu_i32, reorder_weights_opt)
1981+
{
1982+
auto& engine = get_test_engine();
1983+
1984+
layout in_layout(data_types::f32, format::bfyx, { 16, 1, 2, 1 });
1985+
layout out_layout(data_types::i32, format::os_iyx_osv16, { 16, 1, 2, 1 });
1986+
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
1987+
1988+
auto input = engine.allocate_memory(in_layout);
1989+
1990+
set_values(input, {
1991+
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f,
1992+
8.f, 9.f, 10.f, 0.5f, 12.f, 13.f, 14.f, 15.f,
1993+
16.f, 17.f, 18.f, 19.f, 20.f, -1.6f, 22.f, 23.f,
1994+
-1.0f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f
1995+
});
1996+
1997+
topology topology {
1998+
input_layout("input", in_layout),
1999+
reorder("reorder", input_info("input"), weights_reorder_params)
2000+
};
2001+
2002+
ExecutionConfig config = get_test_default_config(engine);
2003+
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::os_iyx_osv16, "reorder_weights_opt", impl_types::ocl };
2004+
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
2005+
2006+
network network(engine, topology, config);
2007+
network.set_input_data("input", input);
2008+
2009+
auto outputs = network.execute();
2010+
ASSERT_EQ(outputs.size(), size_t(1));
2011+
ASSERT_EQ(outputs.begin()->first, "reorder");
2012+
2013+
std::vector<int32_t> ref_output = {
2014+
0, 2, 4, 6, 8, 10, 12, 14,
2015+
16, 18, 20, 22, -1, 26, 28, 30,
2016+
1, 3, 5, 7, 9, 0, 13, 15,
2017+
17, 19, -1, 23, 25, 27, 29, 31
2018+
};
2019+
2020+
auto output = outputs.begin()->second.get_memory();
2021+
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
2022+
2023+
ASSERT_EQ(output_ptr.size(), ref_output.size());
2024+
for (size_t i = 0; i < ref_output.size(); ++i) {
2025+
ASSERT_EQ(output_ptr[i], ref_output[i]);
2026+
}
2027+
}
2028+
19312029
TEST(reorder_gpu_i64, basic)
19322030
{
19332031
// Test for converting data types f32->i64

0 commit comments

Comments
 (0)