Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 66 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ option( EXCHCXX_ENABLE_SYCL "Enable Device Code (SYCL)" OFF )
option( EXCHCXX_ENABLE_LIBXC "Enable Libxc Backend" ON )
option( BUILD_SHARED_LIBS "Build Shared Libs" OFF )

# --- SYCL Architecture Options (mutually exclusive) ---
option(EXCHCXX_SYCL_INTEL_PVC_ARCH "Target Intel PVC (Ponte Vecchio)" OFF)
option(EXCHCXX_SYCL_NV_SM_80_ARCH "Target NVIDIA SM_80 (A100)" OFF)
option(EXCHCXX_SYCL_NV_SM_90_ARCH "Target NVIDIA SM_90 (H100/H200)" OFF)
option(EXCHCXX_SYCL_AMD_GFX90A_ARCH "Target AMD gfx90a (MI200)" OFF)
option(EXCHCXX_SYCL_AMD_GFX942_ARCH "Target AMD gfx942 (MI300A/X)" OFF)


# Decided if we're compiling device bindings
if( EXCHCXX_ENABLE_CUDA OR EXCHCXX_ENABLE_SYCL OR EXCHCXX_ENABLE_HIP )
Expand All @@ -31,9 +38,60 @@ endif()


if(EXCHCXX_ENABLE_SYCL)
# e.g. intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942
set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Alias for -fsycl-targets (see Users Manual)")
endif()
# Allowed alias strings (as consumed by -fsycl-targets)
set(_EXCHCXX_SYCL_ALLOWED
intel_gpu_pvc
nvidia_gpu_sm_80
nvidia_gpu_sm_90
amd_gpu_gfx90a
amd_gpu_gfx942
)

# Collect selections from booleans
set(_EXCHCXX_SYCL_FROM_BOOLEANS "")
if(EXCHCXX_SYCL_INTEL_PVC_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS intel_gpu_pvc)
endif()
if(EXCHCXX_SYCL_NV_SM_80_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_80)
endif()
if(EXCHCXX_SYCL_NV_SM_90_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_90)
endif()
if(EXCHCXX_SYCL_AMD_GFX90A_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx90a)
endif()
if(EXCHCXX_SYCL_AMD_GFX942_ARCH)
list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx942)
endif()

# Resolve the selected target (prefer booleans; fall back to user-provided EXCHCXX_SYCL_TARGET)
set(_EXCHCXX_SELECTED "")
list(LENGTH _EXCHCXX_SYCL_FROM_BOOLEANS _exchcxx_bool_count)
if(_exchcxx_bool_count GREATER 1)
message(FATAL_ERROR
"Multiple SYCL arch options enabled (${_EXCHCXX_SYCL_FROM_BOOLEANS}). Enable exactly one.")
elseif(_exchcxx_bool_count EQUAL 1)
list(GET _EXCHCXX_SYCL_FROM_BOOLEANS 0 _EXCHCXX_SELECTED)
elseif(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "")
set(_EXCHCXX_SELECTED "${EXCHCXX_SYCL_TARGET}") # backward compat
endif()

# Validate & cache the resolved target (or leave empty for JIT)
if(NOT _EXCHCXX_SELECTED STREQUAL "")
list(FIND _EXCHCXX_SYCL_ALLOWED "${_EXCHCXX_SELECTED}" _exchcxx_sycl_idx)
if(_exchcxx_sycl_idx EQUAL -1)
message(FATAL_ERROR
"Invalid SYCL AoT target '${_EXCHCXX_SELECTED}'. "
"Allowed values: ${_EXCHCXX_SYCL_ALLOWED}")
endif()
set(EXCHCXX_SYCL_TARGET "${_EXCHCXX_SELECTED}" CACHE STRING "Resolved SYCL target alias" FORCE)
else()
set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Resolved SYCL target alias (empty => JIT)" FORCE)
message(STATUS "ExchCXX SYCL AoT disabled (no target selected) — will JIT at runtime.")
endif()

endif(EXCHCXX_ENABLE_SYCL)


# Append local cmake directory to find CMAKE Modules
Expand Down Expand Up @@ -75,7 +133,7 @@ if( ${Libxc_FOUND} )
else()

option( FETCHCONTENT_LIBXC_GIT_SHALLOW "Whether to use GIT_SHALLOW for FetchContent'ing libxc" ON )

FetchContent_Declare(
libxc
GIT_REPOSITORY https://gitlab.com/libxc/libxc.git
Expand All @@ -91,8 +149,8 @@ else()

FetchContent_MakeAvailable( libxc )
add_library( Libxc::xc ALIAS xc )
target_include_directories( xc
PUBLIC
target_include_directories( xc
PUBLIC
$<BUILD_INTERFACE:${libxc_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${libxc_BINARY_DIR}/src>
$<BUILD_INTERFACE:${libxc_BINARY_DIR}>
Expand All @@ -104,15 +162,15 @@ else()
set_target_properties(xc PROPERTIES UNITY_BUILD OFF)
message(STATUS "Will disable unity-build for Libxc::xc")
endif()

set( BUILD_TESTING ${OLD_BUILD_TESTING} CACHE BOOL "" FORCE )

endif()

else( EXCHCXX_ENABLE_LIBXC )
set( Libxc_FOUND FALSE )
endif( EXCHCXX_ENABLE_LIBXC )

add_subdirectory( src )

# Testing
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ small subset of XC functionals which may be evaluated either on the host (CPU)
or device (GPU, FPGA, etc). Currently GPU support is provided through the
[CUDA](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) for NVIDIA
GPUs, [HIP](https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html) for
AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental)
for generic accelerator backends (including Intel GPUs).
AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental,
supports only oneAPI implementaion) for generic accelerator backends (including Intel GPUs).


ExchCXX is a work in progress. Its development has been funded by the U.S.
Expand Down
36 changes: 18 additions & 18 deletions include/exchcxx/impl/builtin/kernels/deorbitalized.hpp
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
/**
* ExchCXX
* ExchCXX
*
* Copyright (c) 2020-2024, The Regents of the University of California,
* through Lawrence Berkeley National Laboratory (subject to receipt of
* any required approvals from the U.S. Dept. of Energy).
* any required approvals from the U.S. Dept. of Energy).
*
* Portions Copyright (c) Microsoft Corporation.
*
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* (1) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
*
* (2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* (3) Neither the name of the University of California, Lawrence Berkeley
* National Laboratory, U.S. Dept. of Energy nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Expand All @@ -36,7 +36,7 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* You are under no obligation whatsoever to provide any bug fixes, patches,
* or upgrades to the features, functionality or performance of the source
* code ("Enhancements") to anyone; however, if you choose to make your
Expand All @@ -49,7 +49,7 @@
* in binary and source code form.
*/

#pragma once
#pragma once
#include <exchcxx/impl/builtin/fwd.hpp>
#include <exchcxx/impl/builtin/kernel_type.hpp>
#include <exchcxx/impl/builtin/kernels/screening_interface.hpp>
Expand Down Expand Up @@ -165,9 +165,9 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b,
double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb,
double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb,
double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a,
double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a,
double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b,
double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a,
double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a,
double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b,
double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb,
double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b,
Expand All @@ -185,8 +185,8 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {

BUILTIN_KERNEL_EVAL_RETURN
eval_fxc_unpolar( double rho, double sigma, double lapl, double tau,
double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau,
double& v2sigma2, double& v2sigmalapl, double& v2sigmatau,
double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau,
double& v2sigma2, double& v2sigmalapl, double& v2sigmatau,
double& v2lapl2, double& v2lapltau, double& v2tau2 ) {
#if defined(__CUDACC__) || defined(__HIPCC__)
printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n");
Expand All @@ -199,8 +199,8 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
}

BUILTIN_KERNEL_EVAL_RETURN
eval_fxc_polar( double rho_a, double rho_b,
double sigma_aa, double sigma_ab, double sigma_bb,
eval_fxc_polar( double rho_a, double rho_b,
double sigma_aa, double sigma_ab, double sigma_bb,
double lapl_a, double lapl_b, double tau_a, double tau_b,
double& v2rho2_aa, double& v2rho2_ab, double& v2rho2_bb,
double& v2rhosigma_a_aa, double& v2rhosigma_a_ab, double& v2rhosigma_a_bb,
Expand All @@ -209,9 +209,9 @@ struct kernel_traits<Deorbitalized<XCEF,KEDF>> {
double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b,
double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb,
double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb,
double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a,
double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a,
double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b,
double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a,
double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a,
double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b,
double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb,
double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b,
Expand Down
83 changes: 24 additions & 59 deletions include/exchcxx/impl/builtin/util.hpp
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
/**
* ExchCXX
* ExchCXX
*
* Copyright (c) 2020-2024, The Regents of the University of California,
* through Lawrence Berkeley National Laboratory (subject to receipt of
* any required approvals from the U.S. Dept. of Energy).
* any required approvals from the U.S. Dept. of Energy).
*
* Portions Copyright (c) Microsoft Corporation.
*
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* (1) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
*
* (2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* (3) Neither the name of the University of California, Lawrence Berkeley
* National Laboratory, U.S. Dept. of Energy nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Expand All @@ -36,7 +36,7 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* You are under no obligation whatsoever to provide any bug fixes, patches,
* or upgrades to the features, functionality or performance of the source
* code ("Enhancements") to anyone; however, if you choose to make your
Expand All @@ -56,6 +56,14 @@
#include <cmath>
#include <cfloat>

#if defined(__CUDACC__) || defined(__HIPCC__)
#define EXCHCXX_READONLY_TABLE static __device__
#elif defined(__SYCL_DEVICE_ONLY__)
#define EXCHCXX_READONLY_TABLE inline constexpr
#else
#define EXCHCXX_READONLY_TABLE static
#endif

namespace ExchCXX {


Expand Down Expand Up @@ -118,7 +126,7 @@ SAFE_INLINE(auto) erf( T x ) { return sm::erf(x); }
template <typename T, typename U>
SAFE_INLINE(auto) pow( T x, U e ) { return sm::pow(x,e); }
template <typename T>
SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); }
SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); }



Expand All @@ -139,16 +147,8 @@ SAFE_INLINE(auto) xc_cheb_eval(const double x, const double *cs, const int N)

return 0.5*(b0 - b2);
}

// The following data is taken from libxc
#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double AE11_data[39] = {
EXCHCXX_READONLY_TABLE double AE11_data[39] = {
0.121503239716065790, -0.065088778513550150, 0.004897651357459670, -0.000649237843027216, 0.000093840434587471,
0.000000420236380882, -0.000008113374735904, 0.000002804247688663, 0.000000056487164441, -0.000000344809174450,
0.000000058209273578, 0.000000038711426349, -0.000000012453235014, -0.000000005118504888, 0.000000002148771527,
Expand All @@ -159,72 +159,37 @@ double AE11_data[39] = {
-0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double AE12_data[25] = {
EXCHCXX_READONLY_TABLE double AE12_data[25] = {
0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391,
-0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012,
0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602,
-0.000000000015830222, 0.000000000002905732, 0.000000000001769356, 0.000000000000492735, 0.000000000000093709,
0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double E11_data[19] = {
EXCHCXX_READONLY_TABLE double E11_data[19] = {
-16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938,
0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176,
-0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541,
0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double E12_data[16] = {
EXCHCXX_READONLY_TABLE double E12_data[16] = {
-0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022,
0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374,
-0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148,
0.00000000000000000315
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double AE13_data[25] = {
EXCHCXX_READONLY_TABLE double AE13_data[25] = {
-0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603,
-0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870,
0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706,
-0.000000000008733026, 0.000000000002023672, -0.000000000000474132, 0.000000000000112211, -0.000000000000026804,
0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023
};

#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ static
#elif defined(__SYCL_DEVICE_ONLY__)
inline constexpr
#else
static
#endif
double AE14_data[26] = {
EXCHCXX_READONLY_TABLE double AE14_data[26] = {
-0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661,
-0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440,
0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588,
Expand Down
Loading