diff --git a/common/defines.h b/common/defines.h index 55194aff..8359b0a5 100644 --- a/common/defines.h +++ b/common/defines.h @@ -166,4 +166,12 @@ enum {C=0,N=1,O=2,H=3,XX=4,P=5,S=6}; // see "bond_index" in the "AD4.1_bound.da #define SYCL_ATOMICS_MEM_SCOPE sycl::memory_scope::device #endif +#define SYCL_INTEL_KERNEL_ARGS_RESTRICT + +#ifdef SYCL_INTEL_KERNEL_ARGS_RESTRICT + #define SYCL_KERNEL_ARGS_RESTRICT [[intel::kernel_args_restrict]] +#else + #define SYCL_KERNEL_ARGS_RESTRICT +#endif + #endif /* DEFINES_H_ */ diff --git a/dpcpp/kernel1.dp.cpp b/dpcpp/kernel1.dp.cpp index 6fa89a42..c241e204 100644 --- a/dpcpp/kernel1.dp.cpp +++ b/dpcpp/kernel1.dp.cpp @@ -88,7 +88,8 @@ void gpu_calc_initpop( sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threadsPerBlock), sycl::range<3>(1, 1, threadsPerBlock)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> item_ct1) + SYCL_KERNEL_ARGS_RESTRICT { gpu_calc_initpop_kernel( pConformations_current, pEnergies_current, item_ct1, *cData_ptr_ct1, diff --git a/dpcpp/kernel2.dp.cpp b/dpcpp/kernel2.dp.cpp index c091083e..066bc9fa 100644 --- a/dpcpp/kernel2.dp.cpp +++ b/dpcpp/kernel2.dp.cpp @@ -79,7 +79,8 @@ void gpu_sum_evals(uint32_t blocks, uint32_t threadsPerBlock) sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threadsPerBlock), sycl::range<3>(1, 1, threadsPerBlock)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> item_ct1) + SYCL_KERNEL_ARGS_RESTRICT { gpu_sum_evals_kernel( item_ct1, *cData_ptr_ct1, sSum_evals_acc_ct1.get_pointer()); diff --git a/dpcpp/kernel3.dp.cpp b/dpcpp/kernel3.dp.cpp index 25b0f51f..d8e09506 100644 --- a/dpcpp/kernel3.dp.cpp +++ b/dpcpp/kernel3.dp.cpp @@ -418,7 +418,8 @@ void gpu_perform_LS( sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threads), sycl::range<3>(1, 1, threads)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> item_ct1) + SYCL_KERNEL_ARGS_RESTRICT { gpu_perform_LS_kernel( pMem_conformations_next, pMem_energies_next, item_ct1, dpct_local_acc_ct1.get_pointer(), diff --git a/dpcpp/kernel4.dp.cpp b/dpcpp/kernel4.dp.cpp index 7547e103..75732b80 100644 --- a/dpcpp/kernel4.dp.cpp +++ b/dpcpp/kernel4.dp.cpp @@ -465,8 +465,9 @@ void gpu_gen_and_eval_newpops( sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threadsPerBlock), sycl::range<3>(1, 1, threadsPerBlock)), - [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [=](sycl::nd_item<3> item_ct1) + [[intel::reqd_sub_group_size(32)]] + SYCL_KERNEL_ARGS_RESTRICT { gpu_gen_and_eval_newpops_kernel( pMem_conformations_current, pMem_energies_current, pMem_conformations_next, diff --git a/dpcpp/kernel_ad.dp.cpp b/dpcpp/kernel_ad.dp.cpp index 7c7ee6a5..08305c5e 100644 --- a/dpcpp/kernel_ad.dp.cpp +++ b/dpcpp/kernel_ad.dp.cpp @@ -492,7 +492,8 @@ void gpu_gradient_minAD( sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threads), sycl::range<3>(1, 1, threads)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> item_ct1) + SYCL_KERNEL_ARGS_RESTRICT { gpu_gradient_minAD_kernel( pMem_conformations_next, pMem_energies_next, item_ct1, dpct_local_acc_ct1.get_pointer(), diff --git a/dpcpp/kernel_adam.dp.cpp b/dpcpp/kernel_adam.dp.cpp index fe9c7b6a..560950c0 100644 --- a/dpcpp/kernel_adam.dp.cpp +++ b/dpcpp/kernel_adam.dp.cpp @@ -486,7 +486,8 @@ void gpu_gradient_minAdam( sycl::nd_range<3>(sycl::range<3>(1, 1, blocks) * sycl::range<3>(1, 1, threads), sycl::range<3>(1, 1, threads)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> item_ct1) + SYCL_KERNEL_ARGS_RESTRICT { gpu_gradient_minAdam_kernel( pMem_conformations_next, pMem_energies_next, item_ct1, dpct_local_acc_ct1.get_pointer(),