Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ option( EXCHCXX_ENABLE_SYCL "Enable Device Code (SYCL)" OFF )
option( EXCHCXX_ENABLE_LIBXC "Enable Libxc Backend" ON )
option( BUILD_SHARED_LIBS "Build Shared Libs" OFF )

# --- SYCL Architecture Options (mutually exclusive) ---
option(EXCHCXX_SYCL_INTEL_PVC_ARCH "Target Intel PVC (Ponte Vecchio)" OFF)
option(EXCHCXX_SYCL_NV_SM_80_ARCH "Target NVIDIA SM_80 (A100)" OFF)
option(EXCHCXX_SYCL_NV_SM_90_ARCH "Target NVIDIA SM_90 (H100/H200)" OFF)
option(EXCHCXX_SYCL_AMD_GFX90A_ARCH "Target AMD gfx90a (MI200)" OFF)
option(EXCHCXX_SYCL_AMD_GFX942_ARCH "Target AMD gfx942 (MI300A/X)" OFF)


# Decided if we're compiling device bindings
if( EXCHCXX_ENABLE_CUDA OR EXCHCXX_ENABLE_SYCL OR EXCHCXX_ENABLE_HIP )
Expand All @@ -30,6 +37,63 @@ if( EXCHCXX_ENABLE_SYCL AND EXCHCXX_ENABLE_HIP )
endif()


if(EXCHCXX_ENABLE_SYCL)
# Allowed alias strings (as consumed by -fsycl-targets)
set(_EXCHCXX_SYCL_ALLOWED
intel_gpu_pvc
nvidia_gpu_sm_80
nvidia_gpu_sm_90
amd_gpu_gfx90a
amd_gpu_gfx942
)

# Collect selections from booleans
set(_EXCHCXX_SYCL_FROM_BOOLEANS "")
if(EXCHCXX_SYCL_INTEL_PVC_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS intel_gpu_pvc)
endif()
if(EXCHCXX_SYCL_NV_SM_80_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_80)
endif()
if(EXCHCXX_SYCL_NV_SM_90_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_90)
endif()
if(EXCHCXX_SYCL_AMD_GFX90A_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx90a)
endif()
if(EXCHCXX_SYCL_AMD_GFX942_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx942)
endif()

# Resolve the selected target (prefer booleans; fall back to user-provided EXCHCXX_SYCL_TARGET)
set(_EXCHCXX_SELECTED "")
list(LENGTH _EXCHCXX_SYCL_FROM_BOOLEANS _exchcxx_bool_count)
if(_exchcxx_bool_count GREATER 1)
message(FATAL_ERROR
"Multiple SYCL arch options enabled (${_EXCHCXX_SYCL_FROM_BOOLEANS}). Enable exactly one.")
elseif(_exchcxx_bool_count EQUAL 1)
list(GET _EXCHCXX_SYCL_FROM_BOOLEANS 0 _EXCHCXX_SELECTED)
elseif(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "")
set(_EXCHCXX_SELECTED "${EXCHCXX_SYCL_TARGET}") # backward compat
endif()

# Validate & cache the resolved target (or leave empty for JIT)
if(NOT _EXCHCXX_SELECTED STREQUAL "")
list(FIND _EXCHCXX_SYCL_ALLOWED "${_EXCHCXX_SELECTED}" _exchcxx_sycl_idx)
if(_exchcxx_sycl_idx EQUAL -1)
message(FATAL_ERROR
"Invalid SYCL AoT target '${_EXCHCXX_SELECTED}'. "
"Allowed values: ${_EXCHCXX_SYCL_ALLOWED}")
endif()
set(EXCHCXX_SYCL_TARGET "${_EXCHCXX_SELECTED}" CACHE STRING "Resolved SYCL target alias" FORCE)
else()
set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Resolved SYCL target alias (empty => JIT)" FORCE)
message(STATUS "ExchCXX SYCL AoT disabled (no target selected) — will JIT at runtime.")
endif()

endif(EXCHCXX_ENABLE_SYCL)


# Append local cmake directory to find CMAKE Modules
if( CMAKE_MODULE_PATH )
list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ small subset of XC functionals which may be evaluated either on the host (CPU)
or device (GPU, FPGA, etc). Currently GPU support is provided through the
[CUDA](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) for NVIDIA
GPUs, [HIP](https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html) for
AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental)
for generic accelerator backends (including Intel GPUs).
AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental,
supports only oneAPI implementaion) for generic accelerator backends (including Intel GPUs).


ExchCXX is a work in progress. Its development has been funded by the U.S.
Expand Down
16 changes: 12 additions & 4 deletions include/exchcxx/impl/builtin/kernels/deorbitalized.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,10 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau,
double& v2sigma2, double& v2sigmalapl, double& v2sigmatau,
double& v2lapl2, double& v2lapltau, double& v2tau2 ) {
#if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
#if defined(__CUDACC__) || defined(__HIPCC__)
printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n");
#elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n");
#else
unused(rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2);
throw std::runtime_error("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels");
Expand Down Expand Up @@ -170,8 +172,10 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb,
double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b,
double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) {
#if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
#if defined(__CUDACC__) || defined(__HIPCC__)
printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n");
#elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n");
#else
unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, vrho_a, vrho_b, vsigma_aa, vsigma_ab, vsigma_bb, vlapl_a, vlapl_b, vtau_a, vtau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb);
throw std::runtime_error("eval_vxc_fxc_polar not implemented for deorbitalized kernels");
Expand All @@ -184,8 +188,10 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau,
double& v2sigma2, double& v2sigmalapl, double& v2sigmatau,
double& v2lapl2, double& v2lapltau, double& v2tau2 ) {
#if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
#if defined(__CUDACC__) || defined(__HIPCC__)
printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n");
#elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
sycl::ext::oneapi::experimental::printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n");
#else
unused(rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2);
throw std::runtime_error("eval_fxc_unpolar not implemented for deorbitalized kernels");
Expand All @@ -210,8 +216,10 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb,
double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b,
double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) {
#if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
#if defined(__CUDACC__) || defined(__HIPCC__)
printf("eval_fxc_polar not implemented for deorbitalized kernels\n");
#elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL)
sycl::ext::oneapi::experimental::printf("eval_fxc_polar not implemented for deorbitalized kernels\n");
#else
unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb);
throw std::runtime_error("eval_fxc_polar not implemented for deorbitalized kernels");
Expand Down
38 changes: 14 additions & 24 deletions include/exchcxx/impl/builtin/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@
#include <cmath>
#include <cfloat>

#if defined(__CUDACC__) || defined(__HIPCC__)
#define EXCHCXX_READONLY_TABLE static __device__
#elif defined(__SYCL_DEVICE_ONLY__)
#define EXCHCXX_READONLY_TABLE inline constexpr
#else
#define EXCHCXX_READONLY_TABLE static
#endif

namespace ExchCXX {


Expand Down Expand Up @@ -140,10 +148,7 @@ SAFE_INLINE(auto) xc_cheb_eval(const double x, const double *cs, const int N)
return 0.5*(b0 - b2);
}
// The following data is taken from libxc
#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double AE11_data[39] = {
EXCHCXX_READONLY_TABLE double AE11_data[39] = {
0.121503239716065790, -0.065088778513550150, 0.004897651357459670, -0.000649237843027216, 0.000093840434587471,
0.000000420236380882, -0.000008113374735904, 0.000002804247688663, 0.000000056487164441, -0.000000344809174450,
0.000000058209273578, 0.000000038711426349, -0.000000012453235014, -0.000000005118504888, 0.000000002148771527,
Expand All @@ -154,52 +159,37 @@ static double AE11_data[39] = {
-0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double AE12_data[25] = {
EXCHCXX_READONLY_TABLE double AE12_data[25] = {
0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391,
-0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012,
0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602,
-0.000000000015830222, 0.000000000002905732, 0.000000000001769356, 0.000000000000492735, 0.000000000000093709,
0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double E11_data[19] = {
EXCHCXX_READONLY_TABLE double E11_data[19] = {
-16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938,
0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176,
-0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541,
0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double E12_data[16] = {
EXCHCXX_READONLY_TABLE double E12_data[16] = {
-0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022,
0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374,
-0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148,
0.00000000000000000315
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double AE13_data[25] = {
EXCHCXX_READONLY_TABLE double AE13_data[25] = {
-0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603,
-0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870,
0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706,
-0.000000000008733026, 0.000000000002023672, -0.000000000000474132, 0.000000000000112211, -0.000000000000026804,
0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__
#endif
static double AE14_data[26] = {
EXCHCXX_READONLY_TABLE double AE14_data[26] = {
-0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661,
-0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440,
0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588,
Expand Down
2 changes: 1 addition & 1 deletion include/exchcxx/util/exchcxx_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
#define DEVICE_PARAMS sycl::queue* queue
#define DEVICE_PARAMS_NOTYPE queue

#define SYCL_KERNEL_PARAMS sycl::id<1> idx
#define SYCL_KERNEL_PARAMS sycl::id<1> tid

#endif

Expand Down
Loading