diff --git a/src/Ewald.cpp b/src/Ewald.cpp index de23f1bc7..d97d192cc 100644 --- a/src/Ewald.cpp +++ b/src/Ewald.cpp @@ -1530,8 +1530,7 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords, CallBoxForceReciprocalGPU( ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge, particleMol, particleHasNoCharge, particleUsed, startMol, lengthMol, - ff.alpha[box], ff.alphaSq[box], constValue, imageSizeRef[box], - molCoords, currentAxes, box); + constValue, imageSizeRef[box], molCoords, currentAxes, box); delete[] particleUsed; #else // molecule iterator diff --git a/src/FFParticle.cpp b/src/FFParticle.cpp index db35b589d..11e169b8b 100644 --- a/src/FFParticle.cpp +++ b/src/FFParticle.cpp @@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie, double diElectric_1 = 1.0 / forcefield.dielectric; InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind, forcefield.isMartini, count, forcefield.rCut, - forcefield.rCutCoulomb, forcefield.rCutLow, - forcefield.rswitch, forcefield.alpha, forcefield.ewald, - diElectric_1); + forcefield.rCutSq, forcefield.rCutCoulomb, + forcefield.rCutCoulombSq, forcefield.rCutLow, + forcefield.rswitch, forcefield.alpha, forcefield.alphaSq, + forcefield.ewald, diElectric_1); #endif } diff --git a/src/Forcefield.h b/src/Forcefield.h index 85ddcd5d5..64ac8f6ba 100644 --- a/src/Forcefield.h +++ b/src/Forcefield.h @@ -54,12 +54,12 @@ class Forcefield { double tolerance; // Ewald sum terms double rswitch; // Switch distance double dielectric; // dielectric for martini - double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions + double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions double sc_alpha; // Free energy parameter double sc_sigma, sc_sigma_6; // Free energy parameter bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction - bool electrostatic, ewald; // To consider columb interaction + bool electrostatic, ewald; // To consider coulomb interaction bool vdwGeometricSigma; // For sigma combining rule bool isMartini; bool exp6; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 5812426b6..164091427 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -450,8 +450,8 @@ void CallBoxForceReciprocalGPU( const std::vector &particleMol, const std::vector &particleHasNoCharge, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, double alphaSq, double constValue, uint imageSize, - XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) { + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box) { int atomCount = atomForceRec.Count(); int molCount = molForceRec.Count(); double *gpu_particleCharge; @@ -518,13 +518,13 @@ void CallBoxForceReciprocalGPU( vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz, vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz, gpu_particleCharge, gpu_particleMol, gpu_particleHasNoCharge, - gpu_particleUsed, gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue, - imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box], - vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z, - vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box], - vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + gpu_particleUsed, gpu_startMol, gpu_lengthMol, vars->gpu_alpha, + vars->gpu_alphaSq, constValue, imageSize, vars->gpu_kxRef[box], + vars->gpu_kyRef[box], vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, + vars->gpu_z, vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], + vars->gpu_sumInew[box], vars->gpu_isFraction, vars->gpu_molIndex, + vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box, atomCount); cudaDeviceSynchronize(); @@ -558,7 +558,7 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double alpha, double alphaSq, double constValue, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, @@ -627,11 +627,11 @@ __global__ void BoxForceReciprocalGPU( gpu_Invcell_z); dist = sqrt(distSq); - double expConstValue = exp(-1.0 * alphaSq * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq); double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(alpha * dist) / dist) - constValue * expConstValue); + intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 16a3fc532..80062586b 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_EWALD_CUDA_KERNEL -#define CALCULATE_EWALD_CUDA_KERNEL +#ifndef CALCULATE_EWALD_CUDA_KERNEL_H +#define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -23,8 +23,6 @@ void CallBoxForceReciprocalGPU(VariablesCUDA *vars, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, - double alphaSq, double constValue, uint imageSize, XYZArray const &molCoords, @@ -103,8 +101,8 @@ __global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, bool *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol, - double alpha, - double alphaSq, + double *gpu_alpha, + double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, @@ -190,4 +188,4 @@ __global__ void BoxReciprocalGPU(double *gpu_prefact, int imageSize); #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_EWALD_CUDA_KERNEL*/ +#endif /*CALCULATE_EWALD_CUDA_KERNEL_H*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index c827a803d..219a6cafb 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,7 +125,7 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, @@ -302,7 +302,7 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], @@ -469,7 +469,7 @@ __global__ void BoxInterForceGPU( double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, @@ -577,7 +577,7 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); @@ -608,7 +608,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, @@ -708,7 +708,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box], + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); } @@ -868,12 +868,12 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, + double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -886,20 +886,20 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -909,13 +909,13 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -928,20 +928,20 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -950,13 +950,13 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,20 +968,20 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -990,12 +990,12 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } @@ -1009,11 +1009,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } else { return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } @@ -1021,13 +1021,14 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -1053,7 +1054,7 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, @@ -1061,7 +1062,7 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } if (sc_coul) { @@ -1075,21 +1076,21 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * CalcCoulombVirSwitchGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_alphaSq, gpu_rCut); } } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut) { + double gpu_alphaSq, double gpu_rCut) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 29d7953db..072939c24 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_FORCE_CUDA_KERNEL -#define CALCULATE_FORCE_CUDA_KERNEL +#ifndef CALCULATE_FORCE_CUDA_KERNEL_H +#define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -114,6 +114,7 @@ __global__ void BoxForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, @@ -183,6 +184,7 @@ __global__ void BoxInterForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, @@ -249,32 +251,32 @@ __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, //ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, @@ -286,18 +288,18 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut); //VDW Calculation @@ -359,7 +361,7 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, int gpu_isMartini, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, @@ -377,25 +379,25 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int index = FlatIndexGPU(kind1, kind2, gpu_count); if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); @@ -403,4 +405,4 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_FORCE_CUDA_KERNEL*/ +#endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu index eda129319..e32aa3098 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cu +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu @@ -31,9 +31,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, - int isMartini, int count, double Rcut, - double const *rCutCoulomb, double RcutLow, double Ron, - double const *alpha, int ewald, double diElectric_1) { + int isMartini, int count, double Rcut, double RcutSq, + double const *rCutCoulomb, double const *rCutCoulombSq, + double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1) { int countSq = count * count; CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double)); CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double)); @@ -42,14 +43,17 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, CUMALLOC((void **)&vars.gpu_isMartini, sizeof(int)); CUMALLOC((void **)&vars.gpu_count, sizeof(int)); CUMALLOC((void **)&vars.gpu_rCut, sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutSq, sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutCoulomb, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutCoulombSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutLow, sizeof(double)); CUMALLOC((void **)&vars.gpu_rOn, sizeof(double)); CUMALLOC((void **)&vars.gpu_alpha, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_alphaSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_ewald, sizeof(int)); CUMALLOC((void **)&vars.gpu_diElectric_1, sizeof(double)); - // allocate gpu memory for lambda variables + // allocate GPU memory for lambda variables CUMALLOC((void **)&vars.gpu_molIndex, (int)BOX_TOTAL * sizeof(int)); CUMALLOC((void **)&vars.gpu_lambdaVDW, (int)BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_lambdaCoulomb, (int)BOX_TOTAL * sizeof(double)); @@ -65,13 +69,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_count, &count, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCut, &Rcut, sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutSq, &RcutSq, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutCoulomb, rCutCoulomb, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutCoulombSq, rCutCoulombSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutLow, &RcutLow, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rOn, &Ron, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_alpha, alpha, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_alphaSq, alphaSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double), cudaMemcpyHostToDevice); diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 68eafb71b..3584e46c2 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL -#define CONSTANT_DEFINITIONS_CUDA_KERNEL +#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H +#define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -25,9 +25,9 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, int isMartini, int count, - double Rcut, double const *rCutCoulomb, - double RcutLow, double Ron, double const *alpha, - int ewald, double diElectric_1); + double Rcut, double RcutSq, double const *rCutCoulomb, + double const *rCutCoulombSq, double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1); void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber, uint maxAtomsInMol, uint maxMolNumber); void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal); @@ -47,4 +47,4 @@ void DestroyExp6CUDAVars(VariablesCUDA *vars); void DestroyCUDAVars(VariablesCUDA *vars); #endif /*GOMC_CUDA*/ -#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL*/ +#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL_H*/ diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index 687b6e2c6..00b63e272 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -65,10 +65,13 @@ public: gpu_isMartini = NULL; gpu_count = NULL; gpu_rCut = NULL; + gpu_rCutSq = NULL; gpu_rCutLow = NULL; gpu_rOn = NULL; gpu_alpha = NULL; + gpu_alphaSq = NULL; gpu_rCutCoulomb = NULL; + gpu_rCutCoulombSq = NULL; gpu_ewald = NULL; gpu_diElectric_1 = NULL; gpu_aForcex = NULL; @@ -92,11 +95,11 @@ public: int *gpu_isMartini; int *gpu_count; int *gpu_startAtomIdx; //start atom index of the molecule - double *gpu_rCut; - double *gpu_rCutCoulomb; + double *gpu_rCut, *gpu_rCutSq; + double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow; double *gpu_rOn; - double *gpu_alpha; + double *gpu_alpha, *gpu_alphaSq; int *gpu_ewald; double *gpu_diElectric_1; double *gpu_x, *gpu_y, *gpu_z;