diff --git a/src/gpu/amd/miopen_binary.cpp b/src/gpu/amd/miopen_binary.cpp index dc49689df3d..9ce2069fafd 100644 --- a/src/gpu/amd/miopen_binary.cpp +++ b/src/gpu/amd/miopen_binary.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_binary.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -40,20 +42,21 @@ status_t miopen_binary_t::execute(const exec_ctx_t &ctx) const { auto arg_scale1 = CTX_IN_SYCL_MEMORY(DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_1); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - void *a = arg_src_0.get_native_pointer(ih); - void *b = arg_src_1.get_native_pointer(ih); - void *c = arg_dst.get_native_pointer(ih); - void *s0 = arg_scale0.get_native_pointer(ih); - void *s1 = arg_scale1.get_native_pointer(ih); + void *a = arg_src_0.get_native_pointer(ih); + void *b = arg_src_1.get_native_pointer(ih); + void *c = arg_dst.get_native_pointer(ih); + void *s0 = arg_scale0.get_native_pointer(ih); + void *s1 = arg_scale1.get_native_pointer(ih); - pd()->binary_impl_->execute(handle, a, b, c, s0, s1); - }); + pd()->binary_impl_->execute(handle, a, b, c, s0, s1); + }); }); } diff --git a/src/gpu/amd/miopen_convolution.cpp b/src/gpu/amd/miopen_convolution.cpp index 8f5c47bb07f..fe9e1694261 100644 --- a/src/gpu/amd/miopen_convolution.cpp +++ b/src/gpu/amd/miopen_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_convolution.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -55,25 +57,26 @@ status_t miopen_convolution_fwd_t::execute_convolution( ::sycl::access::mode::read_write>(temp_reorder_mem, cgh); } - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - - std::vector args; - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_weights.get_native_pointer(ih)); - args.push_back(arg_dst.get_native_pointer(ih)); - args.push_back(arg_bias.get_native_pointer(ih)); - args.push_back(arg_scratch.get_native_pointer(ih)); - args.push_back(arg_filter_scratch.get_native_pointer(ih)); - args.push_back(temp_dst.get_native_pointer(ih)); - args.push_back(temp_reorder.get_native_pointer(ih)); - args.push_back(arg_oscale.get_native_pointer(ih)); - - pd()->impl_->execute(handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + + std::vector args; + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_weights.get_native_pointer(ih)); + args.push_back(arg_dst.get_native_pointer(ih)); + args.push_back(arg_bias.get_native_pointer(ih)); + args.push_back(arg_scratch.get_native_pointer(ih)); + args.push_back(arg_filter_scratch.get_native_pointer(ih)); + args.push_back(temp_dst.get_native_pointer(ih)); + args.push_back(temp_reorder.get_native_pointer(ih)); + args.push_back(arg_oscale.get_native_pointer(ih)); + + pd()->impl_->execute(handle, args); + }); }); } @@ -91,22 +94,23 @@ status_t miopen_convolution_bwd_data_t::execute_convolution( auto arg_filter_scratch = CTX_SCRATCH_SYCL_MEMORY( memory_tracking::names::key_conv_miopen_filter); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - - std::vector args; - args.push_back(arg_diff_src.get_native_pointer(ih)); - args.push_back(arg_weights.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_bias.get_native_pointer(ih)); - args.push_back(arg_scratch.get_native_pointer(ih)); - args.push_back(arg_filter_scratch.get_native_pointer(ih)); - - pd()->impl_->execute(handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + + std::vector args; + args.push_back(arg_diff_src.get_native_pointer(ih)); + args.push_back(arg_weights.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_bias.get_native_pointer(ih)); + args.push_back(arg_scratch.get_native_pointer(ih)); + args.push_back(arg_filter_scratch.get_native_pointer(ih)); + + pd()->impl_->execute(handle, args); + }); }); } @@ -118,17 +122,19 @@ status_t miopen_convolution_bwd_weights_t::execute_zero_dims( auto arg_diff_weights = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_WEIGHTS); auto arg_diff_bias = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_BIAS); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - void *weights = arg_diff_weights.get_native_pointer(ih); - void *bias = arg_diff_bias.get_native_pointer(ih); + void *weights = arg_diff_weights.get_native_pointer(ih); + void *bias = arg_diff_bias.get_native_pointer(ih); - pd()->impl_->execute_set_weights_bias(handle, weights, bias, 0.f); - }); + pd()->impl_->execute_set_weights_bias( + handle, weights, bias, 0.f); + }); }); } @@ -152,22 +158,23 @@ status_t miopen_convolution_bwd_weights_t::execute_convolution( arg_diff_bias = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_BIAS); } - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - - std::vector args; - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_diff_weights.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_diff_bias.get_native_pointer(ih)); - args.push_back(arg_scratch.get_native_pointer(ih)); - args.push_back(arg_filter_scratch.get_native_pointer(ih)); - - pd()->impl_->execute(handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + + std::vector args; + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_diff_weights.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_diff_bias.get_native_pointer(ih)); + args.push_back(arg_scratch.get_native_pointer(ih)); + args.push_back(arg_filter_scratch.get_native_pointer(ih)); + + pd()->impl_->execute(handle, args); + }); }); } diff --git a/src/gpu/amd/miopen_deconvolution.cpp b/src/gpu/amd/miopen_deconvolution.cpp index 16f6dbbe822..92d9bdf455b 100644 --- a/src/gpu/amd/miopen_deconvolution.cpp +++ b/src/gpu/amd/miopen_deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_deconvolution.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -37,17 +39,18 @@ status_t miopen_deconvolution_bwd_weights_t::execute_bias( auto arg_diff_bias = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_BIAS); auto arg_diff_dst = CTX_IN_SYCL_MEMORY(DNNL_ARG_DIFF_DST); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - void *bias = arg_diff_bias.get_native_pointer(ih); - void *y = arg_diff_dst.get_native_pointer(ih); + void *bias = arg_diff_bias.get_native_pointer(ih); + void *y = arg_diff_dst.get_native_pointer(ih); - impl_->execute_bias(handle, y, bias); - }); + impl_->execute_bias(handle, y, bias); + }); }); } diff --git a/src/gpu/amd/miopen_eltwise.cpp b/src/gpu/amd/miopen_eltwise.cpp index 9bcb8e50fbf..7263cd7306c 100644 --- a/src/gpu/amd/miopen_eltwise.cpp +++ b/src/gpu/amd/miopen_eltwise.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_eltwise.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -36,18 +38,20 @@ status_t miopen_eltwise_fwd_t::execute(const exec_ctx_t &ctx) const { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - std::vector args; - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + std::vector args; + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_dst.get_native_pointer(ih)); + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_dst.get_native_pointer(ih)); - pd()->eltwise_fwd_impl_->execute(handle, args.data(), args.size()); - }); + pd()->eltwise_fwd_impl_->execute( + handle, args.data(), args.size()); + }); }); } @@ -61,19 +65,21 @@ status_t miopen_eltwise_bwd_t::execute(const exec_ctx_t &ctx) const { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_diff_dst = CTX_IN_SYCL_MEMORY(DNNL_ARG_DIFF_DST); auto arg_diff_src = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_SRC); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - std::vector args; - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + std::vector args; + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_diff_src.get_native_pointer(ih)); + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_diff_src.get_native_pointer(ih)); - pd()->eltwise_bwd_impl_->execute(handle, args.data(), args.size()); - }); + pd()->eltwise_bwd_impl_->execute( + handle, args.data(), args.size()); + }); }); } diff --git a/src/gpu/amd/miopen_inner_product.cpp b/src/gpu/amd/miopen_inner_product.cpp index cff00237295..59991e1e89e 100644 --- a/src/gpu/amd/miopen_inner_product.cpp +++ b/src/gpu/amd/miopen_inner_product.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,8 +15,10 @@ * limitations under the License. *******************************************************************************/ -#include "gpu/amd/miopen_inner_product.hpp" +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_gemm_inner_product.hpp" +#include "gpu/amd/miopen_inner_product.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" #include "xpu/sycl/buffer_memory_storage.hpp" @@ -44,28 +46,32 @@ status_t miopen_inner_product_fwd_t::execute(const exec_ctx_t &ctx) const { = CTX_SCRATCH_SYCL_MEMORY(memory_tracking::names::key_none); auto arg_scaled_bias_scratch = CTX_SCRATCH_SYCL_MEMORY( memory_tracking::names::key_conv_adjusted_scales); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto native_stream = hip_stream->get_underlying_stream(); - auto miopen_handle = hip_stream->get_miopen_handle(native_stream); - auto rocblas_handle = hip_stream->get_rocblas_handle(native_stream); - - std::vector args; - - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_wei.get_native_pointer(ih)); - args.push_back(arg_bias.get_native_pointer(ih)); - args.push_back(arg_dst.get_native_pointer(ih)); - args.push_back(arg_ip_scratch.get_native_pointer(ih)); - args.push_back(arg_spacial_scratch.get_native_pointer(ih)); - args.push_back(arg_scaled_bias_scratch.get_native_pointer(ih)); - args.push_back(arg_oscale.get_native_pointer(ih)); - - pd()->inner_product_impl_->execute( - miopen_handle, rocblas_handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto native_stream = hip_stream->get_underlying_stream(); + auto miopen_handle + = hip_stream->get_miopen_handle(native_stream); + auto rocblas_handle + = hip_stream->get_rocblas_handle(native_stream); + + std::vector args; + + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_wei.get_native_pointer(ih)); + args.push_back(arg_bias.get_native_pointer(ih)); + args.push_back(arg_dst.get_native_pointer(ih)); + args.push_back(arg_ip_scratch.get_native_pointer(ih)); + args.push_back(arg_spacial_scratch.get_native_pointer(ih)); + args.push_back( + arg_scaled_bias_scratch.get_native_pointer(ih)); + args.push_back(arg_oscale.get_native_pointer(ih)); + + pd()->inner_product_impl_->execute( + miopen_handle, rocblas_handle, args); + }); }); } @@ -82,25 +88,28 @@ status_t miopen_inner_product_bwd_data_t::execute(const exec_ctx_t &ctx) const { auto arg_spacial_scratch = CTX_SCRATCH_SYCL_MEMORY(memory_tracking::names::key_none); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto native_stream = hip_stream->get_underlying_stream(); - auto miopen_handle = hip_stream->get_miopen_handle(native_stream); - auto rocblas_handle = hip_stream->get_rocblas_handle(native_stream); - - std::vector args; - - args.push_back(arg_diff_src.get_native_pointer(ih)); - args.push_back(arg_wei.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_ip_scratch.get_native_pointer(ih)); - args.push_back(arg_spacial_scratch.get_native_pointer(ih)); - - pd()->inner_product_impl_->execute( - miopen_handle, rocblas_handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto native_stream = hip_stream->get_underlying_stream(); + auto miopen_handle + = hip_stream->get_miopen_handle(native_stream); + auto rocblas_handle + = hip_stream->get_rocblas_handle(native_stream); + + std::vector args; + + args.push_back(arg_diff_src.get_native_pointer(ih)); + args.push_back(arg_wei.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_ip_scratch.get_native_pointer(ih)); + args.push_back(arg_spacial_scratch.get_native_pointer(ih)); + + pd()->inner_product_impl_->execute( + miopen_handle, rocblas_handle, args); + }); }); } @@ -140,25 +149,28 @@ status_t miopen_inner_product_bwd_weights_t::execute( auto arg_spacial_scratch = CTX_SCRATCH_SYCL_MEMORY(memory_tracking::names::key_none); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto native_stream = hip_stream->get_underlying_stream(); - auto miopen_handle = hip_stream->get_miopen_handle(native_stream); - auto rocblas_handle = hip_stream->get_rocblas_handle(native_stream); - std::vector args; - - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_diff_wei.get_native_pointer(ih)); - args.push_back(arg_bias.get_native_pointer(ih)); - args.push_back(arg_ip_scratch.get_native_pointer(ih)); - args.push_back(arg_spacial_scratch.get_native_pointer(ih)); - - pd()->inner_product_impl_->execute( - miopen_handle, rocblas_handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto native_stream = hip_stream->get_underlying_stream(); + auto miopen_handle + = hip_stream->get_miopen_handle(native_stream); + auto rocblas_handle + = hip_stream->get_rocblas_handle(native_stream); + std::vector args; + + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_diff_wei.get_native_pointer(ih)); + args.push_back(arg_bias.get_native_pointer(ih)); + args.push_back(arg_ip_scratch.get_native_pointer(ih)); + args.push_back(arg_spacial_scratch.get_native_pointer(ih)); + + pd()->inner_product_impl_->execute( + miopen_handle, rocblas_handle, args); + }); }); } diff --git a/src/gpu/amd/miopen_lrn.cpp b/src/gpu/amd/miopen_lrn.cpp index 2639e20fa96..ddd2418c843 100644 --- a/src/gpu/amd/miopen_lrn.cpp +++ b/src/gpu/amd/miopen_lrn.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_lrn.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -38,19 +40,20 @@ status_t miopen_lrn_fwd_t::execute(const exec_ctx_t &ctx) const { auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); auto arg_wrksp = CTX_OUT_SYCL_MEMORY(DNNL_ARG_WORKSPACE); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - void *src_ = arg_src.get_native_pointer(ih); - void *dst_ = arg_dst.get_native_pointer(ih); - void *ws_ = arg_wrksp.get_native_pointer(ih); + void *src_ = arg_src.get_native_pointer(ih); + void *dst_ = arg_dst.get_native_pointer(ih); + void *ws_ = arg_wrksp.get_native_pointer(ih); - std::vector args {src_, dst_, ws_}; - pd()->lrn_impl_->execute(handle, args); - }); + std::vector args {src_, dst_, ws_}; + pd()->lrn_impl_->execute(handle, args); + }); }); } @@ -66,20 +69,21 @@ status_t miopen_lrn_bwd_t::execute(const exec_ctx_t &ctx) const { auto arg_diff_src = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_SRC); auto arg_ws = CTX_IN_SYCL_MEMORY(DNNL_ARG_WORKSPACE); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - std::vector args; - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_ws.get_native_pointer(ih)); - args.push_back(arg_diff_src.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - - pd()->lrn_impl_->execute(handle, args); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + std::vector args; + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_ws.get_native_pointer(ih)); + args.push_back(arg_diff_src.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + + pd()->lrn_impl_->execute(handle, args); + }); }); } diff --git a/src/gpu/amd/miopen_matmul_executor.hpp b/src/gpu/amd/miopen_matmul_executor.hpp index dcf48183a42..81ae7fa2ade 100644 --- a/src/gpu/amd/miopen_matmul_executor.hpp +++ b/src/gpu/amd/miopen_matmul_executor.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,8 @@ #ifndef GPU_AMD_MIOPEN_MATMUL_EXECUTOR_HPP #define GPU_AMD_MIOPEN_MATMUL_EXECUTOR_HPP +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/engine.hpp" #include "gpu/amd/miopen_matmul.hpp" #include "gpu/amd/miopen_matmul_impl.hpp" @@ -101,7 +103,8 @@ struct miopen_matmul_scratch_runtime_args_bias_exec_t init_scratch_buffer(scratchpad_size); - return hip_stream->interop_task([=](::sycl::handler &cgh) { + return hip_stream->interop_task([= WA_THIS_COPY_CAPTURE]( + ::sycl::handler &cgh) { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); @@ -126,7 +129,8 @@ struct miopen_matmul_runtime_args_scratch_exec_t init_scratch_buffer(scratchpad_size); - return hip_stream->interop_task([=](::sycl::handler &cgh) { + return hip_stream->interop_task([= WA_THIS_COPY_CAPTURE]( + ::sycl::handler &cgh) { auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); @@ -150,7 +154,8 @@ struct miopen_matmul_runtime_args_bias_exec_t amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { + return hip_stream->interop_task([= WA_THIS_COPY_CAPTURE]( + ::sycl::handler &cgh) { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); @@ -173,20 +178,21 @@ struct miopen_matmul_runtime_args_exec_t : public miopen_matmul_exec_base_t { amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { - auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); - auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); - auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); - - auto arg_bias = xpu::sycl::interop_memory_arg_t< - ::sycl::access::mode::read>(); - auto arg_scratch = xpu::sycl::interop_memory_arg_t< - ::sycl::access::mode::read_write>(); - - interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, arg_src, - arg_dst, /*nullptr*/ arg_bias, - /*nullptr*/ arg_scratch); - }); + return hip_stream->interop_task( + [= WA_THIS_COPY_CAPTURE](::sycl::handler &cgh) { + auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); + auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); + auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); + + auto arg_bias = xpu::sycl::interop_memory_arg_t< + ::sycl::access::mode::read>(); + auto arg_scratch = xpu::sycl::interop_memory_arg_t< + ::sycl::access::mode::read_write>(); + + interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, + arg_src, arg_dst, /*nullptr*/ arg_bias, + /*nullptr*/ arg_scratch); + }); } }; @@ -198,17 +204,18 @@ struct miopen_matmul_bias_scratch_exec_t : public miopen_matmul_exec_base_t { amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { - auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); - auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); - auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); - auto arg_bias = CTX_IN_SYCL_MEMORY(DNNL_ARG_BIAS); - auto arg_scratch = CTX_SCRATCH_SYCL_MEMORY( - memory_tracking::names::key_matmul_dst_in_acc_dt); - - interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, arg_src, - arg_dst, arg_bias, arg_scratch); - }); + return hip_stream->interop_task( + [= WA_THIS_COPY_CAPTURE](::sycl::handler &cgh) { + auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); + auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); + auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); + auto arg_bias = CTX_IN_SYCL_MEMORY(DNNL_ARG_BIAS); + auto arg_scratch = CTX_SCRATCH_SYCL_MEMORY( + memory_tracking::names::key_matmul_dst_in_acc_dt); + + interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, + arg_src, arg_dst, arg_bias, arg_scratch); + }); } }; @@ -220,7 +227,8 @@ struct miopen_matmul_scratch_exec_t : public miopen_matmul_exec_base_t { amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { + return hip_stream->interop_task([= WA_THIS_COPY_CAPTURE]( + ::sycl::handler &cgh) { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); @@ -244,7 +252,8 @@ struct miopen_matmul_bias_exec_t : public miopen_matmul_exec_base_t { amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { + return hip_stream->interop_task([= WA_THIS_COPY_CAPTURE]( + ::sycl::handler &cgh) { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); @@ -267,20 +276,21 @@ struct miopen_matmul_exec_t : public miopen_matmul_exec_base_t { amd::stream_t *hip_stream = utils::downcast(ctx.stream()); - return hip_stream->interop_task([=](::sycl::handler &cgh) { - auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); - auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); - auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); - - auto arg_bias = xpu::sycl::interop_memory_arg_t< - ::sycl::access::mode::read>(); - auto arg_scratch = xpu::sycl::interop_memory_arg_t< - ::sycl::access::mode::read_write>(); - - interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, arg_src, - arg_dst, /*nullptr*/ arg_bias, - /*nullptr*/ arg_scratch); - }); + return hip_stream->interop_task( + [= WA_THIS_COPY_CAPTURE](::sycl::handler &cgh) { + auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); + auto arg_wt = CTX_IN_SYCL_MEMORY(DNNL_ARG_WEIGHTS); + auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); + + auto arg_bias = xpu::sycl::interop_memory_arg_t< + ::sycl::access::mode::read>(); + auto arg_scratch = xpu::sycl::interop_memory_arg_t< + ::sycl::access::mode::read_write>(); + + interop_task(matmul_impl_, engine, cgh, hip_stream, arg_wt, + arg_src, arg_dst, /*nullptr*/ arg_bias, + /*nullptr*/ arg_scratch); + }); } }; diff --git a/src/gpu/amd/miopen_pooling.cpp b/src/gpu/amd/miopen_pooling.cpp index 6963c94f45c..a56deb2adc1 100644 --- a/src/gpu/amd/miopen_pooling.cpp +++ b/src/gpu/amd/miopen_pooling.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,8 +15,10 @@ * limitations under the License. *******************************************************************************/ -#include "gpu/amd/miopen_pooling.hpp" +#include "common/compiler_workarounds.hpp" + #include "common/nstl.hpp" +#include "gpu/amd/miopen_pooling.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" #include "xpu/sycl/buffer_memory_storage.hpp" @@ -76,16 +78,17 @@ status_t miopen_pooling_fwd_t::execute(const exec_ctx_t &ctx) const { auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); auto arg_wkspace = CTX_OUT_SYCL_MEMORY(DNNL_ARG_WORKSPACE); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - void *x = arg_src.get_native_pointer(ih); - void *y = arg_dst.get_native_pointer(ih); - void *ws = arg_wkspace.get_native_pointer(ih); - pd()->pooling_impl_->execute(handle, x, y, ws); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + void *x = arg_src.get_native_pointer(ih); + void *y = arg_dst.get_native_pointer(ih); + void *ws = arg_wkspace.get_native_pointer(ih); + pd()->pooling_impl_->execute(handle, x, y, ws); + }); }); } @@ -105,17 +108,18 @@ status_t miopen_pooling_bwd_t::execute(const exec_ctx_t &ctx) const { auto arg_diff_dst = CTX_IN_SYCL_MEMORY(DNNL_ARG_DIFF_DST); auto arg_wkspace = CTX_IN_SYCL_MEMORY(DNNL_ARG_WORKSPACE); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - void *dx = arg_diff_src.get_native_pointer(ih); - void *dy = arg_diff_dst.get_native_pointer(ih); - void *ws = arg_wkspace.get_native_pointer(ih); - - pd()->pooling_impl_->execute(handle, dx, dy, ws); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + void *dx = arg_diff_src.get_native_pointer(ih); + void *dy = arg_diff_dst.get_native_pointer(ih); + void *ws = arg_wkspace.get_native_pointer(ih); + + pd()->pooling_impl_->execute(handle, dx, dy, ws); + }); }); } diff --git a/src/gpu/amd/miopen_reduction.cpp b/src/gpu/amd/miopen_reduction.cpp index 4d97fdca75f..a9b7cb5c256 100644 --- a/src/gpu/amd/miopen_reduction.cpp +++ b/src/gpu/amd/miopen_reduction.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_reduction.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -38,17 +40,18 @@ status_t miopen_reduction_t::execute(const exec_ctx_t &ctx) const { auto arg_scratch = CTX_SCRATCH_SYCL_MEMORY(memory_tracking::names::key_none); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); - - void *a = arg_src.get_native_pointer(ih); - void *c = arg_dst.get_native_pointer(ih); - void *scratch = arg_scratch.get_native_pointer(ih); - pd()->reduction_impl_->execute(handle, a, c, scratch); - }); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); + + void *a = arg_src.get_native_pointer(ih); + void *c = arg_dst.get_native_pointer(ih); + void *scratch = arg_scratch.get_native_pointer(ih); + pd()->reduction_impl_->execute(handle, a, c, scratch); + }); }); } diff --git a/src/gpu/amd/miopen_reorder.cpp b/src/gpu/amd/miopen_reorder.cpp index d51b1ee1299..0b831ac5bd2 100644 --- a/src/gpu/amd/miopen_reorder.cpp +++ b/src/gpu/amd/miopen_reorder.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_reorder.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -39,25 +41,26 @@ status_t miopen_reorder_t::execute(const exec_ctx_t &ctx) const { auto arg_dst_scale = CTX_IN_SYCL_MEMORY(DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + auto handle = hip_stream->get_miopen_handle(); - void *src_ = arg_src.get_native_pointer(ih); - void *dst_ = arg_dst.get_native_pointer(ih); + void *src_ = arg_src.get_native_pointer(ih); + void *dst_ = arg_dst.get_native_pointer(ih); - auto a = static_cast(src_) - + pd()->reorder_->src_offset_in_bytes(); - auto b = static_cast(dst_) - + pd()->reorder_->dst_offset_in_bytes(); + auto a = static_cast(src_) + + pd()->reorder_->src_offset_in_bytes(); + auto b = static_cast(dst_) + + pd()->reorder_->dst_offset_in_bytes(); - void *src_sc = arg_src_scale.get_native_pointer(ih); - void *dst_sc = arg_dst_scale.get_native_pointer(ih); + void *src_sc = arg_src_scale.get_native_pointer(ih); + void *dst_sc = arg_dst_scale.get_native_pointer(ih); - pd()->reorder_->execute(handle, a, b, src_sc, dst_sc); - }); + pd()->reorder_->execute(handle, a, b, src_sc, dst_sc); + }); }); } diff --git a/src/gpu/amd/miopen_softmax.cpp b/src/gpu/amd/miopen_softmax.cpp index a142216cc00..3c1b09083bf 100644 --- a/src/gpu/amd/miopen_softmax.cpp +++ b/src/gpu/amd/miopen_softmax.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ * limitations under the License. *******************************************************************************/ +#include "common/compiler_workarounds.hpp" + #include "gpu/amd/miopen_softmax.hpp" #include "gpu/amd/stream.hpp" #include "gpu/amd/sycl_hip_scoped_context.hpp" @@ -36,19 +38,21 @@ status_t miopen_softmax_fwd_t::execute(const exec_ctx_t &ctx) const { auto arg_src = CTX_IN_SYCL_MEMORY(DNNL_ARG_SRC); auto arg_dst = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DST); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - std::vector args; - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + std::vector args; + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + auto handle = hip_stream->get_miopen_handle(); - args.push_back(arg_src.get_native_pointer(ih)); - args.push_back(arg_dst.get_native_pointer(ih)); + args.push_back(arg_src.get_native_pointer(ih)); + args.push_back(arg_dst.get_native_pointer(ih)); - pd()->softmax_impl_->execute(handle, args.data(), args.size()); - }); + pd()->softmax_impl_->execute( + handle, args.data(), args.size()); + }); }); } @@ -63,20 +67,22 @@ status_t miopen_softmax_bwd_t::execute(const exec_ctx_t &ctx) const { auto arg_diff_dst = CTX_IN_SYCL_MEMORY(DNNL_ARG_DIFF_DST); auto arg_diff_src = CTX_OUT_SYCL_MEMORY(DNNL_ARG_DIFF_SRC); - compat::host_task(cgh, [=](const compat::interop_handle &ih) { - std::vector args; - auto &sycl_engine - = *utils::downcast(hip_stream->engine()); - auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); + compat::host_task(cgh, + [= WA_THIS_COPY_CAPTURE](const compat::interop_handle &ih) { + std::vector args; + auto &sycl_engine = *utils::downcast( + hip_stream->engine()); + auto sc = hip_sycl_scoped_context_handler_t(sycl_engine); - auto handle = hip_stream->get_miopen_handle(); + auto handle = hip_stream->get_miopen_handle(); - args.push_back(arg_dst.get_native_pointer(ih)); - args.push_back(arg_diff_dst.get_native_pointer(ih)); - args.push_back(arg_diff_src.get_native_pointer(ih)); + args.push_back(arg_dst.get_native_pointer(ih)); + args.push_back(arg_diff_dst.get_native_pointer(ih)); + args.push_back(arg_diff_src.get_native_pointer(ih)); - pd()->softmax_impl_->execute(handle, args.data(), args.size()); - }); + pd()->softmax_impl_->execute( + handle, args.data(), args.size()); + }); }); }