Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Use src buffer size for copy_to() call when called for two cldnn::memory objects #29534

Merged
merged 4 commits into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ struct memory {

virtual event::ptr copy_to(stream& stream, memory& other, bool blocking = true) const {
const auto zero_offset = 0;
const auto data_size = other._bytes_count;
const auto data_size = _bytes_count;
return copy_to(stream, other, zero_offset, zero_offset, data_size, blocking);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "crop_inst.h"
#include "eltwise_inst.h"
#include "gemm_inst.h"
#include "assign_inst.h"
#include "read_value_inst.h"
#include "reshape_inst.h"
#include "permute_inst.h"
Expand Down Expand Up @@ -476,6 +477,10 @@ bool crop_in_place_optimization::match(const program_node& node,
return false;
if (user->is_type<loop>() || user->is_type<non_max_suppression>())
return false;
// Read_value and assign don't handle data paddings internally, thus disable
// crop optimization for now
if (user->is_type<read_value>() || user->is_type<assign>())
return false;
// If the input tensor of convolution includes dynamic padding, there is an issue
// where the total size of tensor is not properly calculated and becomes 0
// It causes issue for internal buffer allocation during runtime
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct assign_impl : public typed_primitive_impl<assign> {

stream.wait_for_events(events);

const auto ev_set_memory = variable.get_memory()->copy_from(stream, instance.input_memory());
const auto ev_set_memory = variable.get_memory()->copy_from(stream, instance.input_memory(), 0, 0, variable.get_layout().bytes_count(), true);
variable.set();

return ev_set_memory;
Expand Down
36 changes: 36 additions & 0 deletions src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,30 @@
namespace cldnn {
namespace ocl {

static inline void check_boundaries(size_t src_size,
size_t src_offset,
size_t dst_size,
size_t dst_offset,
size_t copy_size,
const std::string& func_str = "") {
OPENVINO_ASSERT(src_offset + copy_size <= src_size && dst_offset + copy_size <= dst_size,
"[GPU] Incorrect buffer sizes for ",
func_str,
" call. ",
"Parameters provided are",
": src_size=",
src_size,
", src_offset=",
src_offset,
", dst_size=",
dst_size,
", dst_offset=",
dst_offset,
", copy_size=",
copy_size,
".");
}

static inline cldnn::event::ptr create_event(stream& stream, size_t bytes_count, bool need_user_event) {
if (bytes_count == 0) {
GPU_DEBUG_TRACE_DETAIL << "Skip memory operation for 0 size tensor" << std::endl;
Expand Down Expand Up @@ -132,6 +156,8 @@ event::ptr gpu_buffer::copy_from(stream& stream, const void* data_ptr, size_t sr
if (size == 0)
return result_event;

check_boundaries(SIZE_MAX, src_offset, _bytes_count, dst_offset, size, "gpu_buffer::copy_from(void*)");

auto cl_stream = downcast<ocl_stream>(&stream);
auto cl_event = blocking ? nullptr : &downcast<ocl_event>(result_event.get())->get();
auto src_ptr = reinterpret_cast<const char*>(data_ptr) + src_offset;
Expand All @@ -146,6 +172,8 @@ event::ptr gpu_buffer::copy_from(stream& stream, const memory& src_mem, size_t s
if (size == 0)
return result_event;

check_boundaries(src_mem.size(), src_offset, _bytes_count, dst_offset, size, "gpu_buffer::copy_from(memory&)");

switch (src_mem.get_allocation_type()) {
case allocation_type::usm_host:
case allocation_type::usm_shared:
Expand Down Expand Up @@ -180,6 +208,8 @@ event::ptr gpu_buffer::copy_to(stream& stream, void* data_ptr, size_t src_offset
if (size == 0)
return result_event;

check_boundaries(_bytes_count, src_offset, SIZE_MAX, dst_offset, size, "gpu_buffer::copy_to(void*)");

auto cl_stream = downcast<ocl_stream>(&stream);
auto cl_event = blocking ? nullptr : &downcast<ocl_event>(result_event.get())->get();
auto dst_ptr = reinterpret_cast<char*>(data_ptr) + dst_offset;
Expand Down Expand Up @@ -540,6 +570,8 @@ event::ptr gpu_usm::copy_from(stream& stream, const void* data_ptr, size_t src_o
if (size == 0)
return result_event;

check_boundaries(SIZE_MAX, src_offset, _bytes_count, dst_offset, size, "gpu_usm::copy_from(void*)");

auto cl_stream = downcast<ocl_stream>(&stream);
auto cl_event = blocking ? nullptr : &downcast<ocl_event>(result_event.get())->get();
auto src_ptr = reinterpret_cast<const char*>(data_ptr) + src_offset;
Expand All @@ -555,6 +587,8 @@ event::ptr gpu_usm::copy_from(stream& stream, const memory& src_mem, size_t src_
if (size == 0)
return result_event;

check_boundaries(src_mem.size(), src_offset, _bytes_count, dst_offset, size, "gpu_usm::copy_from(memory&)");

auto cl_stream = downcast<ocl_stream>(&stream);
auto cl_event = blocking ? nullptr : &downcast<ocl_event>(result_event.get())->get();

Expand Down Expand Up @@ -586,6 +620,8 @@ event::ptr gpu_usm::copy_to(stream& stream, void* data_ptr, size_t src_offset, s
if (size == 0)
return result_event;

check_boundaries(_bytes_count, src_offset, SIZE_MAX, dst_offset, size, "gpu_usm::copy_to(void*)");

auto cl_stream = downcast<ocl_stream>(&stream);
auto cl_event = blocking ? nullptr : &downcast<ocl_event>(result_event.get())->get();
auto src_ptr = reinterpret_cast<const char*>(buffer_ptr()) + src_offset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,15 @@ INSTANTIATE_TEST_SUITE_P(mem_test,
mem_test_params{0, 79, 381},
mem_test_params{100, 79, 381}),
::testing::Values(false, true)));

TEST(mem_test, copy_small_buf_to_large_with_out_of_bound_access) {
auto& ocl_engine = dynamic_cast<ocl::ocl_engine&>(get_test_engine());
auto& stream = get_test_stream();
auto small_buffer_size = 2048;
auto large_buffer_size = 3072;

auto small_buffer = ocl_engine.allocate_memory({{small_buffer_size}, data_types::u8, format::bfyx}, allocation_type::cl_mem, false);
auto large_buffer = ocl_engine.allocate_memory({{large_buffer_size}, data_types::u8, format::bfyx}, allocation_type::cl_mem, false);

OV_ASSERT_NO_THROW(small_buffer->copy_to(stream, *large_buffer, true));
}
Loading