Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/gauxc/enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ enum class AtomicGridSizeDefault {
* molecular integration
*/
enum class XCWeightAlg {
NOTPARTITIONED, ///< Not partitioned
Becke, ///< The original Becke weighting scheme
SSF, ///< The Stratmann-Scuseria-Frisch weighting scheme
LKO ///< The Lauqua-Kuessman-Ochsenfeld weighting scheme
Expand Down
3 changes: 3 additions & 0 deletions include/gauxc/load_balancer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <gauxc/xc_task.hpp>
#include <gauxc/util/timer.hpp>
#include <gauxc/runtime_environment.hpp>
#include <gauxc/enums.hpp>

namespace GauXC {

Expand All @@ -27,6 +28,8 @@ namespace detail {
struct LoadBalancerState {
bool modified_weights_are_stored = false;
///< Whether the load balancer currently stores partitioned weights
XCWeightAlg weight_alg = XCWeightAlg::NOTPARTITIONED;
///< Weight partitioning scheme used by this LoadBalancer
};


Expand Down
4 changes: 2 additions & 2 deletions include/gauxc/xc_integrator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class XCIntegrator {
exc_vxc_type_gks eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&,
const IntegratorSettingsXC& = IntegratorSettingsXC{});

exc_grad_type eval_exc_grad( const MatrixType& );
exc_grad_type eval_exc_grad( const MatrixType&, const MatrixType& );
exc_grad_type eval_exc_grad( const MatrixType&, const IntegratorSettingsXC& = IntegratorSettingsXC{} );
exc_grad_type eval_exc_grad( const MatrixType&, const MatrixType&, const IntegratorSettingsXC& = IntegratorSettingsXC{} );

exx_type eval_exx ( const MatrixType&,
const IntegratorSettingsEXX& = IntegratorSettingsEXX{} );
Expand Down
8 changes: 4 additions & 4 deletions include/gauxc/xc_integrator/impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ typename XCIntegrator<MatrixType>::exc_vxc_type_gks

template <typename MatrixType>
typename XCIntegrator<MatrixType>::exc_grad_type
XCIntegrator<MatrixType>::eval_exc_grad( const MatrixType& P ) {
XCIntegrator<MatrixType>::eval_exc_grad( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {
if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
return pimpl_->eval_exc_grad(P);
return pimpl_->eval_exc_grad(P, ks_settings);
};

template <typename MatrixType>
typename XCIntegrator<MatrixType>::exc_grad_type
XCIntegrator<MatrixType>::eval_exc_grad( const MatrixType& Ps, const MatrixType& Pz ) {
XCIntegrator<MatrixType>::eval_exc_grad( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {
if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
return pimpl_->eval_exc_grad(Ps, Pz);
return pimpl_->eval_exc_grad(Ps, Pz, ks_settings);
};

template <typename MatrixType>
Expand Down
8 changes: 4 additions & 4 deletions include/gauxc/xc_integrator/replicated/impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,27 +159,27 @@ typename ReplicatedXCIntegrator<MatrixType>::exc_vxc_type_gks

template <typename MatrixType>
typename ReplicatedXCIntegrator<MatrixType>::exc_grad_type
ReplicatedXCIntegrator<MatrixType>::eval_exc_grad_( const MatrixType& P ) {
ReplicatedXCIntegrator<MatrixType>::eval_exc_grad_( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {

if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();

std::vector<value_type> EXC_GRAD( 3*pimpl_->load_balancer().molecule().natoms() );
pimpl_->eval_exc_grad( P.rows(), P.cols(), P.data(), P.rows(),
EXC_GRAD.data() );
EXC_GRAD.data(), ks_settings );

return EXC_GRAD;

}

template <typename MatrixType>
typename ReplicatedXCIntegrator<MatrixType>::exc_grad_type
ReplicatedXCIntegrator<MatrixType>::eval_exc_grad_( const MatrixType& Ps, const MatrixType& Pz ) {
ReplicatedXCIntegrator<MatrixType>::eval_exc_grad_( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {

if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();

std::vector<value_type> EXC_GRAD( 3*pimpl_->load_balancer().molecule().natoms() );
pimpl_->eval_exc_grad( Ps.rows(), Ps.cols(), Ps.data(), Ps.rows(), Pz.data(), Pz.rows(),
EXC_GRAD.data() );
EXC_GRAD.data(), ks_settings );

return EXC_GRAD;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ class ReplicatedXCIntegratorImpl {
value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0;

virtual void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp,
value_type* EXC_GRAD ) = 0;
value_type* EXC_GRAD, const IntegratorSettingsXC& ks_settings ) = 0;
virtual void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldps,
const value_type* Pz, int64_t lpdz, value_type* EXC_GRAD ) = 0;
const value_type* Pz, int64_t lpdz, value_type* EXC_GRAD, const IntegratorSettingsXC& ks_settings ) = 0;
virtual void eval_exx_( int64_t m, int64_t n, const value_type* P,
int64_t ldp, value_type* K, int64_t ldk,
const IntegratorSettingsEXX& settings ) = 0;
Expand Down Expand Up @@ -150,9 +150,9 @@ class ReplicatedXCIntegratorImpl {


void eval_exc_grad( int64_t m, int64_t n, const value_type* P, int64_t ldp,
value_type* EXC_GRAD );
value_type* EXC_GRAD, const IntegratorSettingsXC& ks_settings );
void eval_exc_grad( int64_t m, int64_t n, const value_type* Ps, int64_t ldps,
const value_type* Pz, int64_t ldpz, value_type* EXC_GRAD );
const value_type* Pz, int64_t ldpz, value_type* EXC_GRAD, const IntegratorSettingsXC& ks_settings );

void eval_exx( int64_t m, int64_t n, const value_type* P,
int64_t ldp, value_type* K, int64_t ldk,
Expand Down
4 changes: 2 additions & 2 deletions include/gauxc/xc_integrator/replicated_xc_integrator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl<MatrixType> {
exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType&, const IntegratorSettingsXC& ) override;
exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC&) override;
exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override;
exc_grad_type eval_exc_grad_( const MatrixType& ) override;
exc_grad_type eval_exc_grad_( const MatrixType&, const MatrixType& ) override;
exc_grad_type eval_exc_grad_( const MatrixType&, const IntegratorSettingsXC& ) override;
exc_grad_type eval_exc_grad_( const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override;
exx_type eval_exx_ ( const MatrixType&, const IntegratorSettingsEXX& ) override;
fxc_contraction_type_rks eval_fxc_contraction_ ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override;
fxc_contraction_type_uks eval_fxc_contraction_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC&) override;
Expand Down
12 changes: 6 additions & 6 deletions include/gauxc/xc_integrator/xc_integrator_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ class XCIntegratorImpl {
virtual exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) = 0;
virtual exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px,
const IntegratorSettingsXC& ks_settings ) = 0;
virtual exc_grad_type eval_exc_grad_( const MatrixType& P ) = 0;
virtual exc_grad_type eval_exc_grad_( const MatrixType& Ps, const MatrixType& Pz ) = 0;
virtual exc_grad_type eval_exc_grad_( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) = 0;
virtual exc_grad_type eval_exc_grad_( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) = 0;
virtual exx_type eval_exx_ ( const MatrixType& P,
const IntegratorSettingsEXX& settings ) = 0;
virtual fxc_contraction_type_rks eval_fxc_contraction_ ( const MatrixType& P,
Expand Down Expand Up @@ -125,17 +125,17 @@ class XCIntegratorImpl {
* @param[in] P The alpha density matrix
* @returns EXC gradient
*/
exc_grad_type eval_exc_grad( const MatrixType& P ) {
return eval_exc_grad_(P);
exc_grad_type eval_exc_grad( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {
return eval_exc_grad_(P, ks_settings);
}

/** Integrate EXC gradient for UKS
*
* @param[in] P The alpha density matrix
* @returns EXC gradient
*/
exc_grad_type eval_exc_grad( const MatrixType& Ps, const MatrixType& Pz ) {
return eval_exc_grad_(Ps, Pz);
exc_grad_type eval_exc_grad( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {
return eval_exc_grad_(Ps, Pz, ks_settings);
}

/** Integrate Exact Exchange for RHF
Expand Down
4 changes: 4 additions & 0 deletions include/gauxc/xc_integrator_settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,8 @@ struct IntegratorSettingsKS : public IntegratorSettingsXC {
double gks_dtol = 1e-12;
};

struct IntegratorSettingsEXC_GRAD : public IntegratorSettingsKS {
bool include_weight_derivatives= true; // whether to include grid weight contribution and employ translational invariance, or just use Hellmann-Feynman gradient
};

}
3 changes: 2 additions & 1 deletion src/molecular_weights/device/device_molecular_weights.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void DeviceMolecularWeights::modify_weights( LoadBalancer& lb ) const {
auto task_comparator = []( const XCTask& a, const XCTask& b ) {
return (a.points.size() * a.bfn_screening.nbe) > (b.points.size() * b.bfn_screening.nbe);
};
std::sort(task_begin, task_end, task_comparator );
std::stable_sort(task_begin, task_end, task_comparator );

const auto& mol = lb.molecule();
const auto natoms = mol.natoms();
Expand Down Expand Up @@ -79,6 +79,7 @@ void DeviceMolecularWeights::modify_weights( LoadBalancer& lb ) const {
rt.device_backend()->master_queue_synchronize();

lb.state().modified_weights_are_stored = true;
lb.state().weight_alg = this->settings_.weight_alg;

}

Expand Down
3 changes: 2 additions & 1 deletion src/molecular_weights/host/host_molecular_weights.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void HostMolecularWeights::modify_weights( LoadBalancer& lb ) const {
auto task_comparator = []( const XCTask& a, const XCTask& b ) {
return (a.points.size() * a.bfn_screening.nbe) > (b.points.size() * b.bfn_screening.nbe);
};
std::sort( tasks.begin(), tasks.end(), task_comparator );
std::stable_sort( tasks.begin(), tasks.end(), task_comparator );

// Modify the weights
const auto& mol = lb.molecule();
Expand All @@ -34,6 +34,7 @@ void HostMolecularWeights::modify_weights( LoadBalancer& lb ) const {
tasks.begin(), tasks.end() );

lb.state().modified_weights_are_stored = true;
lb.state().weight_alg = this->settings_.weight_alg;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace integrator {
template <typename F = double>
constexpr F magic_ssf_factor = 0.64;

constexpr double ssf_weight_tol = 1e-10;
constexpr double ssf_weight_tol = 1e-13;

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ enum class DeviceBlasUplo : unsigned char {
Lower
};

template <typename T>
void increment( device_blas_handle generic_handle, const T* X, T* Y, int N );


template <typename T>
void dot( device_blas_handle handle,
int N,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
namespace GauXC {

void increment_exc_grad_lda( integrator_ks_scheme ks_scheme, size_t nshell, ShellToTaskDevice* shell_to_task,
XCDeviceTask* device_tasks, double* EXC_GRAD, device_queue );
XCDeviceTask* device_tasks, double* EXC_GRAD, bool with_weight_derivatives, device_queue );
void increment_exc_grad_gga( integrator_ks_scheme ks_scheme, size_t nshell, ShellToTaskDevice* shell_to_task,
XCDeviceTask* device_tasks, double* EXC_GRAD, device_queue );
XCDeviceTask* device_tasks, double* EXC_GRAD, bool with_weight_derivatives, device_queue );
void increment_exc_grad_mgga( integrator_ks_scheme ks_scheme, size_t nshell, bool need_lapl, ShellToTaskDevice* shell_to_task,
XCDeviceTask* device_tasks, double* EXC_GRAD, device_queue );
XCDeviceTask* device_tasks, double* EXC_GRAD, bool with_weight_derivatives, device_queue );

}

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "cuda_aos_scheme1.hpp"
#include "device/cuda/cuda_backend.hpp"
#include "cuda_aos_scheme1_weights.hpp"
#include "device/common/device_blas.hpp"

namespace GauXC {

Expand Down Expand Up @@ -37,6 +38,41 @@ void CudaAoSScheme1<Base>::partition_weights( XCDeviceData* _data ) {
scheme1_stack.dist_nearest_device, base_stack.weights_device, *device_backend->master_stream );
}

template <typename Base>
void CudaAoSScheme1<Base>::eval_weight_1st_deriv_contracted( XCDeviceData* _data, XCWeightAlg alg ) {
if( alg != XCWeightAlg::SSF ) {
GAUXC_GENERIC_EXCEPTION("Weight Algorithm NYI for CUDA AoS Scheme1");
}
auto* data = dynamic_cast<Data*>(_data);
if( !data ) GAUXC_BAD_LWD_DATA_CAST();

auto device_backend = dynamic_cast<CUDABackend*>(data->device_backend_);
if( !device_backend ) GAUXC_BAD_BACKEND_CAST();

// make w times f vector
const bool is_UKS = data->allocated_terms.ks_scheme == UKS;
const bool is_GKS = data->allocated_terms.ks_scheme == GKS;
const bool is_pol = is_UKS or is_GKS;
auto base_stack = data->base_stack;
if( is_pol )
increment( data->device_backend_->master_blas_handle(), base_stack.den_z_eval_device,
base_stack.den_s_eval_device, data->total_npts_task_batch );

hadamard_product(data->device_backend_->master_blas_handle(), data->total_npts_task_batch, 1, base_stack.den_s_eval_device, 1,
base_stack.eps_eval_device, 1);


// Compute distances from grid to atomic centers
const auto ldatoms = data->get_ldatoms();
auto static_stack = data->static_stack;
auto scheme1_stack = data->scheme1_stack;
cuda_aos_scheme1_weight_1st_deriv_wrapper( data->total_npts_task_batch, data->global_dims.natoms,
base_stack.points_x_device, base_stack.points_y_device, base_stack.points_z_device,
static_stack.rab_device, ldatoms, static_stack.coords_device,
scheme1_stack.dist_scratch_device, ldatoms, scheme1_stack.iparent_device,
scheme1_stack.dist_nearest_device, base_stack.eps_eval_device, static_stack.exc_grad_device, *device_backend->master_stream );
}


template struct CudaAoSScheme1<AoSScheme1Base>;
#ifdef GAUXC_HAS_MAGMA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct CudaAoSScheme1 : public Base {

// API Overrides
void partition_weights( XCDeviceData* ) override final;
void eval_weight_1st_deriv_contracted( XCDeviceData*, XCWeightAlg ) override final;

std::unique_ptr<XCDeviceData> create_device_data(const DeviceRuntimeEnvironment&) override final;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,28 @@ void cuda_aos_scheme1_weights_wrapper( int32_t npts, int32_t natoms,
iparent, dist_nearest, weights, stream );
#endif

}


void cuda_aos_scheme1_weight_1st_deriv_wrapper(
int32_t npts, int32_t natoms,
const double* points_x, const double* points_y, const double* points_z,
const double* RAB, int32_t ldRAB, const double* coords,
double* dist, int32_t lddist, const int32_t* iparent,
const double* dist_nearest, const double* w_times_f,
double* exc_grad_w, cudaStream_t stream ){

// Compute distances from grid to atomic centers
compute_grid_to_center_dist( npts, natoms, coords, points_x, points_y, points_z,
dist, lddist, stream );

eval_weight_1st_deriv_contracted_ssf_1d( npts, natoms, RAB, ldRAB, coords, points_x, points_y, points_z, dist, lddist,
iparent, dist_nearest, w_times_f, exc_grad_w, stream );

}





}
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ void cuda_aos_scheme1_weights_wrapper( int32_t npts, int32_t natoms,
double* dist, int32_t lddist, const int32_t* iparent,
const double* dist_nearest, double* weights, cudaStream_t stream );

void cuda_aos_scheme1_weight_1st_deriv_wrapper(
int32_t npts, int32_t natoms,
const double* points_x, const double* points_y, const double* points_z,
const double* RAB, int32_t ldRAB, const double* coords,
double* dist, int32_t lddist, const int32_t* iparent,
const double* dist_nearest, const double* w_times_f,
double* exc_grad_w, cudaStream_t stream );

}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,24 @@ void increment( const T* X, T* Y, cudaStream_t stream ) {
increment_kernel<<<1,1,0,stream>>>(X,Y);
}

template <typename T>
__global__ void increment_vec_kernel( const T* X, T* Y, int N ) {
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
if( tid < N ) Y[tid] += X[tid];
}

template <typename T>
void increment( device_blas_handle generic_handle, const T* X, T* Y, int N) {
const int threads = cuda::warp_size * cuda::max_warps_per_thread_block;
const int blocks = util::div_ceil( N, threads );
cublasHandle_t handle = generic_handle.blas_handle_as<util::cublas_handle>();
auto stream = util::get_stream(handle);
increment_vec_kernel<<<blocks, threads, 0, stream>>>(X,Y,N);
}

template
void increment( device_blas_handle generic_handle, const double* X, double* Y, int N );

template <>
void dot( device_blas_handle generic_handle,
int N,
Expand Down
Loading