Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Col2im primitive #29529

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -269,12 +269,14 @@ REGISTER_FACTORY(v13, BitwiseOr);
REGISTER_FACTORY(v13, BitwiseXor);
REGISTER_FACTORY(v13, FakeConvert);


// ------------------------------ Supported v15 ops ----------------------------- //
REGISTER_FACTORY(v15, ROIAlignRotated);
REGISTER_FACTORY(v15, BitwiseRightShift);
REGISTER_FACTORY(v15, BitwiseLeftShift);
REGISTER_FACTORY(v15, SearchSorted);
REGISTER_FACTORY(v15, STFT);
REGISTER_FACTORY(v15, Col2Im);

// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
Expand Down
104 changes: 104 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/col_to_im.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#include "primitive.hpp"

namespace cldnn {

/// @brief
/// @details
struct col_to_im : public primitive_base<col_to_im> {
CLDNN_DECLARE_PRIMITIVE(col_to_im)

col_to_im() : primitive_base("", {}) {}

/// @brief Constructs col_to_im primitive.
/// @param id This primitive id.
/// @param input Input dictionary primitive id.
/// @param output_size Input
/// @param kernel_size Input
/// @param stride Defines shift in input buffer
/// @param dilation Defines gaps in the input
/// @param padding_begin Defines a padding added to input image on left (x axis) and top (y axis).
/// @param padding_end Defines a padding added to input image on right (x axis) and bottom (y axis).
/// @param output_shape Defines the output tensor the output image
/// @param kernel_shape Defines size of the sliding blocks
col_to_im(const primitive_id& id,
const input_info& input,
const input_info& output_size,
const input_info& kernel_size,
ov::Strides stride,
ov::Strides dilation,
ov::CoordinateDiff padding_begin,
ov::CoordinateDiff padding_end,
ov::Shape output_shape,
ov::Shape kernel_shape)
: primitive_base(id, {input, output_size, kernel_size})
, stride(stride)
, dilation(dilation)
, padding_begin(padding_begin)
, padding_end(padding_end)
, output_shape(output_shape)
, kernel_shape(kernel_shape) {}

/// @brief Defines shift in input buffer
ov::Strides stride;
// @brief Defines gaps in the input
ov::Strides dilation;
/// @param padding_begin Defines a padding added to input image on left (x axis) and top (y axis).
ov::CoordinateDiff padding_begin;
/// @param padding_end Defines a padding added to input image on right (x axis) and bottom (y axis).
ov::CoordinateDiff padding_end;
ov::Shape output_shape;
ov::Shape kernel_shape;

size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_range(seed, padding_end.begin(), padding_end.end());
seed = hash_range(seed, padding_begin.begin(), padding_begin.end());
seed = hash_range(seed, dilation.begin(), dilation.end());
seed = hash_range(seed, stride.begin(), stride.end());
seed = hash_range(seed, output_shape.begin(), output_shape.end());
seed = hash_range(seed, kernel_shape.begin(), kernel_shape.end());
return seed;
}

bool operator==(const primitive& rhs) const override {
if (!compare_common_params(rhs))
return false;

auto rhs_casted = downcast<const col_to_im>(rhs);

#define cmp_fields(name) name == rhs_casted.name
return cmp_fields(stride) &&
cmp_fields(dilation) &&
cmp_fields(padding_begin) &&
cmp_fields(padding_end) &&
cmp_fields(output_shape) &&
cmp_fields(kernel_shape);
#undef cmp_fields
}

void save(BinaryOutputBuffer& ob) const override {
primitive_base<col_to_im>::save(ob);
ob << stride;
ob << dilation;
ob << padding_begin;
ob << padding_end;
ob << output_shape;
ob << kernel_shape;
}

void load(BinaryInputBuffer& ib) override {
primitive_base<col_to_im>::load(ib);
ib >> stride;
ib >> dilation;
ib >> padding_begin;
ib >> padding_end;
ib >> output_shape;
ib >> kernel_shape;
}
};
} // namespace cldnn
101 changes: 101 additions & 0 deletions src/plugins/intel_gpu/src/graph/col_to_im.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "col_to_im_inst.h"
#include "col2im_shape_inference.hpp"

#include "primitive_type_base.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>

namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(col_to_im)

layout col_to_im_inst::calc_output_layout(col_to_im_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<col_to_im>();

auto input_layout = impl_param.get_input_layout();
auto input_format = input_layout.format;

// TODO : do sth here for col2im.(Copied dummy from depth_to_space)
auto out_size = input_layout.get_tensor();
if (format::spatial_num(input_layout.format) == 3) {
// const size_t feature = input_layout.feature() / block_size / block_size / block_size;
// const size_t z = input_layout.spatial(2) * block_size;
// const size_t y = input_layout.spatial(1) * block_size;
// const size_t x = input_layout.spatial(0) * block_size;
// out_size = tensor(TensorValue(input_layout.batch()), TensorValue(feature), TensorValue(x), TensorValue(y), TensorValue(z));
} else {
// const size_t feature = input_layout.feature() / block_size / block_size;
// const size_t y = input_layout.spatial(1) * block_size;
// const size_t x = input_layout.spatial(0) * block_size;
// out_size = tensor(TensorValue(input_layout.batch()), TensorValue(feature), TensorValue(x), TensorValue(y));
}

if (impl_param.has_fused_primitives()) {
input_layout.data_type = impl_param.get_output_element_type();
}

return layout{input_layout.data_type, input_format, out_size};
}

template<typename ShapeType>
std::vector<layout> col_to_im_inst::calc_output_layouts(col_to_im_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<col_to_im>();
auto input_layout = impl_param.get_input_layout(0);
auto output_type = desc->output_data_types[0].value_or(input_layout.data_type);
auto output_format = input_layout.format;

ov::op::v15::Col2Im op;

std::vector<ShapeType> input_shapes = {
input_layout.get<ShapeType>(),
impl_param.get_input_layout(1).get<ShapeType>(),
impl_param.get_input_layout(2).get<ShapeType>(),
};
std::vector<ShapeType> output_shapes = ov::op::v15::shape_infer(&op, input_shapes);

// XXX: quick and dirty implementation of output shape inference. It should have been fed into shape_infer function
output_shapes[0][-1] = node.get_primitive()->output_shape[1];
output_shapes[0][-2] = node.get_primitive()->output_shape[0];
size_t prod = 1;
for (auto t: node.get_primitive()->kernel_shape) {
prod *= t;
}
auto C = input_shapes[0][-2] / prod;
output_shapes[0][-3] = C;

// std::cout << __FILE__ << ":" << __LINE__ << " " << node.id() << " " << output_shapes[0] << " " << input_shapes[1] << " x "<< std::endl;
return { layout{output_shapes[0], output_type, output_format} };
}

template std::vector<layout> col_to_im_inst::calc_output_layouts<ov::PartialShape>(col_to_im_node const& node, const kernel_impl_params& impl_param);

std::string col_to_im_inst::to_string(col_to_im_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
auto& input = node.input();

auto strd = desc->stride;

std::stringstream primitive_description;

json_composite col_to_im_info;
col_to_im_info.add("input id", input.id());
col_to_im_info.add("stride", cldnn::to_string(strd));
col_to_im_info.add("dilation", cldnn::to_string(desc->dilation));
col_to_im_info.add("padding begin", cldnn::to_string(desc->padding_begin));
col_to_im_info.add("padding end", cldnn::to_string(desc->padding_end));

node_info->add("col_to_im info", col_to_im_info);
node_info->dump(primitive_description);

return primitive_description.str();
}

col_to_im_inst::typed_primitive_inst(network& network, col_to_im_node const& node)
: parent(network, node) {}

} // namespace cldnn
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@ void prepare_primitive_fusing::fuse_bias(program &p) {
if (replace_candidate.is_type<convolution>()) {
auto& conv = replace_candidate.as<convolution>();
auto desc = conv.get_primitive();
// XXX: deformable convolution does not support bias fusing at this moment. It is just not tested and deformable_mode value is not properly handled below.
if (desc->deformable_mode)
continue;
primitive_id biases = bias_name;

// If the primitive has biases, then we try to combine the values, or do nothing and keep as fused sum.
Expand Down
75 changes: 75 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/col_to_im.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "primitive_base.hpp"

#include "col_to_im_inst.h"
#include "col_to_im/col_to_im_kernel_selector.h"
#include "col_to_im/col_to_im_kernel_ref.h"

#include "intel_gpu/plugin/common_utils.hpp"

namespace cldnn {
namespace ocl {
struct col_to_im_impl : typed_primitive_impl_ocl<col_to_im> {
using parent = typed_primitive_impl_ocl<col_to_im>;
using parent::parent;
using kernel_selector_t = kernel_selector::col_to_im_kernel_selector;
using kernel_params_t = kernel_selector::col_to_im_params;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::col_to_im_impl)

std::unique_ptr<primitive_impl> clone() const override {
return make_deep_copy<col_to_im_impl, kernel_params_t>(*this);
}

static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
const auto& primitive = impl_param.typed_desc<col_to_im>();
auto col2im_params = get_default_params<kernel_selector::col_to_im_params>(impl_param);

// Attributes
uint32_t stride_x, stride_y, stride_z;
uint32_t dilation_x, dilation_y, dilation_z;
std::tie(stride_x, stride_y, stride_z) = ov::intel_gpu::get_xyz<ov::Strides, uint32_t>(primitive->stride, 1);
col2im_params.stride = {stride_x, stride_y, stride_z};
std::tie(dilation_x, dilation_y, dilation_z) = ov::intel_gpu::get_xyz<ov::Strides, uint32_t>(primitive->dilation, 1);
col2im_params.dilation = {dilation_x, dilation_y, dilation_z};

// padding being & end
uint32_t pad_begin_x, pad_begin_y, pad_begin_z;
std::tie(pad_begin_x, pad_begin_y, pad_begin_z) = ov::intel_gpu::get_xyz<ov::CoordinateDiff, uint32_t>(primitive->padding_begin, 0);
col2im_params.padding_begin = {pad_begin_x, pad_begin_y, pad_begin_z};
uint32_t pad_end_x, pad_end_y, pad_end_z;
std::tie(pad_end_x, pad_end_y, pad_end_z) = ov::intel_gpu::get_xyz<ov::CoordinateDiff, uint32_t>(primitive->padding_end, 0);
col2im_params.padding_end = {pad_end_x, pad_end_y, pad_end_z};

// Col2Im-15 implementation : required
// output size is 1D tensor of two positive integer numbers (height and width)
std::vector<uint32_t> output_size(primitive->output_shape.begin(), primitive->output_shape.end());
std::vector<uint32_t> kernel_size(primitive->kernel_shape.begin(), primitive->kernel_shape.end());
col2im_params.output_size = {output_size[0], output_size[1], (uint32_t)1};
col2im_params.kernel_size = {kernel_size[0], kernel_size[1], (uint32_t)1};

return col2im_params;
}
};

namespace detail {

attach_col_to_im_impl::attach_col_to_im_impl() {
std::vector<data_types> dt = {
data_types::f16,
};
std::vector<format::type> fmt = {
format::bfyx,
};
implementation_map<col_to_im>::add(impl_types::ocl, typed_primitive_impl_ocl<col_to_im>::create<col_to_im_impl>, dt, fmt);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::col_to_im_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::col_to_im)
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ void register_implementations() {
REGISTER_OCL(border);
REGISTER_OCL(broadcast);
REGISTER_OCL(bucketize);
REGISTER_OCL(col_to_im);
REGISTER_OCL(concatenation);
REGISTER_OCL(crop);
REGISTER_OCL(custom_gpu_primitive);
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "intel_gpu/primitives/border.hpp"
#include "intel_gpu/primitives/broadcast.hpp"
#include "intel_gpu/primitives/bucketize.hpp"
#include "intel_gpu/primitives/col_to_im.hpp"
#include "intel_gpu/primitives/concatenation.hpp"
#include "intel_gpu/primitives/convert_color.hpp"
#include "intel_gpu/primitives/crop.hpp"
Expand Down Expand Up @@ -87,6 +88,7 @@ REGISTER_OCL(batch_to_space);
REGISTER_OCL(border);
REGISTER_OCL(broadcast);
REGISTER_OCL(bucketize);
REGISTER_OCL(col_to_im);
REGISTER_OCL(concatenation);
REGISTER_OCL(crop);
REGISTER_OCL(custom_gpu_primitive);
Expand Down
45 changes: 45 additions & 0 deletions src/plugins/intel_gpu/src/graph/include/col_to_im_inst.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#include "intel_gpu/primitives/col_to_im.hpp"
#include "primitive_inst.h"

#include <string>
#include <memory>

namespace cldnn {
template <>
struct typed_program_node<col_to_im> : public typed_program_node_base<col_to_im> {
using parent = typed_program_node_base<col_to_im>;

public:
using parent::parent;

program_node& input(size_t index = 0) const { return get_dependency(index); }
std::shared_ptr<NodeFuseParams> get_fuse_params() const override {
return std::make_shared<NodeFuseParams>(col_to_im::type_id());
}
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};

using col_to_im_node = typed_program_node<col_to_im>;

template <>
class typed_primitive_inst<col_to_im> : public typed_primitive_inst_base<col_to_im> {
using parent = typed_primitive_inst_base<col_to_im>;
using parent::parent;

public:
template<typename ShapeType>
static std::vector<layout> calc_output_layouts(col_to_im_node const& node, kernel_impl_params const& impl_param);
static layout calc_output_layout(col_to_im_node const& node, kernel_impl_params const& impl_param);

static std::string to_string(col_to_im_node const& node);

typed_primitive_inst(network& network, col_to_im_node const& desc);
};

using col_to_im_inst = typed_primitive_inst<col_to_im>;
} // namespace cldnn
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/registry/registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ REGISTER_DEFAULT_IMPLS(adaptive_pooling, OCL_S);
REGISTER_DEFAULT_IMPLS(batch_to_space, OCL_S);
REGISTER_DEFAULT_IMPLS(border, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(bucketize, OCL_S);
REGISTER_DEFAULT_IMPLS(col_to_im, OCL_S);
REGISTER_DEFAULT_IMPLS(custom_gpu_primitive, OCL_S);
REGISTER_DEFAULT_IMPLS(data, COMMON_S, COMMON_D);
REGISTER_DEFAULT_IMPLS(depth_to_space, OCL_S);
Expand Down
Loading
Loading