From ef9c5f66a06a32f2afd266d9d22ff7ab5d26b0c3 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 18 Mar 2025 11:51:47 +0400 Subject: [PATCH 1/3] [GPU] Use src buffer size for copy_to() call when called for two cldnn::memory objects --- .../intel_gpu/include/intel_gpu/runtime/memory.hpp | 2 +- .../tests/unit/module_tests/usm_memory_test.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index dbb6f86205aa32..13db16d5941533 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -110,7 +110,7 @@ struct memory { virtual event::ptr copy_to(stream& stream, memory& other, bool blocking = true) const { const auto zero_offset = 0; - const auto data_size = other._bytes_count; + const auto data_size = _bytes_count; return copy_to(stream, other, zero_offset, zero_offset, data_size, blocking); } diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp index caf3fd1dc603c4..da53124f73e207 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp @@ -488,3 +488,15 @@ INSTANTIATE_TEST_SUITE_P(mem_test, mem_test_params{0, 79, 381}, mem_test_params{100, 79, 381}), ::testing::Values(false, true))); + +TEST(mem_test, copy_to_small_to_large) { + auto& ocl_engine = dynamic_cast(get_test_engine()); + auto& stream = get_test_stream(); + auto small_buffer_size = 2048; + auto large_buffer_size = 3072; + + auto small_buffer = ocl_engine.allocate_memory({{small_buffer_size}, data_types::u8, format::bfyx}, allocation_type::cl_mem, false); + auto large_buffer = ocl_engine.allocate_memory({{large_buffer_size}, data_types::u8, format::bfyx}, allocation_type::cl_mem, false); + + OV_ASSERT_NO_THROW(small_buffer->copy_to(stream, *large_buffer, true)); +} From d8c223d25db4c685e81f85b7943a060619d2a83c Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 18 Mar 2025 14:49:52 +0400 Subject: [PATCH 2/3] Add boundaries check macro --- .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index d18570704ce4be..ba7df4c70f55d8 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -17,6 +17,23 @@ #include #endif +#define BOUNDARIES_CHECK(copy_type, src_size, src_offset, dst_size, dst_offset, copy_size) \ + OPENVINO_ASSERT(src_offset + copy_size <= src_size && dst_offset + copy_size <= dst_size, \ + "[GPU] Incorrect buffer sizes for ", \ + copy_type, \ + " call. Parameters provided are" \ + ": src_size=", \ + src_size, \ + ", src_offset=", \ + src_offset, \ + ", dst_size=", \ + dst_size, \ + ", dst_offset=", \ + dst_offset, \ + ", copy_size=", \ + copy_size, \ + "."); + #define TRY_CATCH_CL_ERROR(...) \ try { \ __VA_ARGS__; \ @@ -132,6 +149,8 @@ event::ptr gpu_buffer::copy_from(stream& stream, const void* data_ptr, size_t sr if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_buffer::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); + auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); auto src_ptr = reinterpret_cast(data_ptr) + src_offset; @@ -146,6 +165,8 @@ event::ptr gpu_buffer::copy_from(stream& stream, const memory& src_mem, size_t s if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_buffer::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); + switch (src_mem.get_allocation_type()) { case allocation_type::usm_host: case allocation_type::usm_shared: @@ -180,6 +201,8 @@ event::ptr gpu_buffer::copy_to(stream& stream, void* data_ptr, size_t src_offset if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_buffer::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); + auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); auto dst_ptr = reinterpret_cast(data_ptr) + dst_offset; @@ -540,6 +563,8 @@ event::ptr gpu_usm::copy_from(stream& stream, const void* data_ptr, size_t src_o if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_usm::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); + auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); auto src_ptr = reinterpret_cast(data_ptr) + src_offset; @@ -555,6 +580,8 @@ event::ptr gpu_usm::copy_from(stream& stream, const memory& src_mem, size_t src_ if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_usm::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); + auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -586,6 +613,8 @@ event::ptr gpu_usm::copy_to(stream& stream, void* data_ptr, size_t src_offset, s if (size == 0) return result_event; + BOUNDARIES_CHECK("gpu_usm::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); + auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); auto src_ptr = reinterpret_cast(buffer_ptr()) + src_offset; @@ -672,3 +701,5 @@ ocl_surfaces_lock::ocl_surfaces_lock(std::vector mem, const stream& } // namespace ocl } // namespace cldnn + +#undef BOUNDARIES_CHECK From c65831f689df529c59ce108cb80d2bbca6599d2e Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 18 Mar 2025 15:58:08 +0400 Subject: [PATCH 3/3] Replace macro with func --- .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 55 ++++++++++--------- .../unit/module_tests/usm_memory_test.cpp | 2 +- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index ba7df4c70f55d8..a4a7ae5db9bc53 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -17,23 +17,6 @@ #include #endif -#define BOUNDARIES_CHECK(copy_type, src_size, src_offset, dst_size, dst_offset, copy_size) \ - OPENVINO_ASSERT(src_offset + copy_size <= src_size && dst_offset + copy_size <= dst_size, \ - "[GPU] Incorrect buffer sizes for ", \ - copy_type, \ - " call. Parameters provided are" \ - ": src_size=", \ - src_size, \ - ", src_offset=", \ - src_offset, \ - ", dst_size=", \ - dst_size, \ - ", dst_offset=", \ - dst_offset, \ - ", copy_size=", \ - copy_size, \ - "."); - #define TRY_CATCH_CL_ERROR(...) \ try { \ __VA_ARGS__; \ @@ -44,6 +27,30 @@ namespace cldnn { namespace ocl { +static inline void check_boundaries(const std::string& func_str, + size_t src_size, + size_t src_offset, + size_t dst_size, + size_t dst_offset, + size_t copy_size) { + OPENVINO_ASSERT(src_offset + copy_size <= src_size && dst_offset + copy_size <= dst_size, + "[GPU] Incorrect buffer sizes for ", + func_str, + " call. ", + "Parameters provided are", + ": src_size=", + src_size, + ", src_offset=", + src_offset, + ", dst_size=", + dst_size, + ", dst_offset=", + dst_offset, + ", copy_size=", + copy_size, + "."); +} + static inline cldnn::event::ptr create_event(stream& stream, size_t bytes_count, bool need_user_event) { if (bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip memory operation for 0 size tensor" << std::endl; @@ -149,7 +156,7 @@ event::ptr gpu_buffer::copy_from(stream& stream, const void* data_ptr, size_t sr if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_buffer::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); + check_boundaries("gpu_buffer::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -165,7 +172,7 @@ event::ptr gpu_buffer::copy_from(stream& stream, const memory& src_mem, size_t s if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_buffer::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); + check_boundaries("gpu_buffer::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); switch (src_mem.get_allocation_type()) { case allocation_type::usm_host: @@ -201,7 +208,7 @@ event::ptr gpu_buffer::copy_to(stream& stream, void* data_ptr, size_t src_offset if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_buffer::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); + check_boundaries("gpu_buffer::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -563,7 +570,7 @@ event::ptr gpu_usm::copy_from(stream& stream, const void* data_ptr, size_t src_o if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_usm::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); + check_boundaries("gpu_usm::copy_from(void*)", SIZE_MAX, src_offset, _bytes_count, dst_offset, size); auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -580,7 +587,7 @@ event::ptr gpu_usm::copy_from(stream& stream, const memory& src_mem, size_t src_ if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_usm::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); + check_boundaries("gpu_usm::copy_from(memory&)", src_mem.size(), src_offset, _bytes_count, dst_offset, size); auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -613,7 +620,7 @@ event::ptr gpu_usm::copy_to(stream& stream, void* data_ptr, size_t src_offset, s if (size == 0) return result_event; - BOUNDARIES_CHECK("gpu_usm::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); + check_boundaries("gpu_usm::copy_to(void*)", _bytes_count, src_offset, SIZE_MAX, dst_offset, size); auto cl_stream = downcast(&stream); auto cl_event = blocking ? nullptr : &downcast(result_event.get())->get(); @@ -701,5 +708,3 @@ ocl_surfaces_lock::ocl_surfaces_lock(std::vector mem, const stream& } // namespace ocl } // namespace cldnn - -#undef BOUNDARIES_CHECK diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp index da53124f73e207..86c9df3deae32d 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp @@ -489,7 +489,7 @@ INSTANTIATE_TEST_SUITE_P(mem_test, mem_test_params{100, 79, 381}), ::testing::Values(false, true))); -TEST(mem_test, copy_to_small_to_large) { +TEST(mem_test, copy_small_buf_to_large_with_out_of_bound_access) { auto& ocl_engine = dynamic_cast(get_test_engine()); auto& stream = get_test_stream(); auto small_buffer_size = 2048;