From 21f20ce877a6ac3aa7483714c3cad50975b94a56 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Tue, 14 May 2024 10:14:53 -0400 Subject: [PATCH 01/16] Extend and optimize implementation of the XYZ operators --- lib/BasicTypes.h | 63 +++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/lib/BasicTypes.h b/lib/BasicTypes.h index 2b9713e00..18097d20c 100644 --- a/lib/BasicTypes.h +++ b/lib/BasicTypes.h @@ -97,17 +97,18 @@ inline void record_debug(uint *x, uint len, std::string filename, //****************************************************************************** -typedef unsigned int uint; -typedef unsigned long int ulong; - -#define UNUSED(x) (void)(x) - -// single XYZ for use as a temporary and return type -struct XYZ { - double x, y, z; - +// single XYZ coordinate for use as a temporary and return type +class XYZ { +public: XYZ() : x(0.0), y(0.0), z(0.0) {} XYZ(double xVal, double yVal, double zVal) : x(xVal), y(yVal), z(zVal) {} + + friend inline std::ostream &operator<<(std::ostream &stream, const XYZ &p); + + inline double getX() const { return x; } + inline double getY() const { return y; } + inline double getZ() const { return z; } + void Reset() { x = y = z = 0.0; } XYZ &operator=(XYZ const &rhs) { x = rhs.x; @@ -115,26 +116,19 @@ struct XYZ { z = rhs.z; return *this; } - bool operator==(XYZ const &rhs) { - if (x == rhs.x && y == rhs.y && z == rhs.z) - return true; - return false; + inline bool operator==(XYZ const &rhs) const { + return (x == rhs.x && y == rhs.y && z == rhs.z); } - bool operator!=(XYZ const &rhs) { - if (x != rhs.x || y != rhs.y || z != rhs.z) - return true; - return false; + inline bool operator!=(XYZ const &rhs) const { + return (x != rhs.x || y != rhs.y || z != rhs.z); } - bool operator<(XYZ const &rhs) { - if (x < rhs.x && y < rhs.y && z < rhs.z) - return true; - return false; + inline bool operator<(XYZ const &rhs) const { + return (x < rhs.x && y < rhs.y && z < rhs.z); } - bool operator>(XYZ const &rhs) { - if (x > rhs.x || y > rhs.y || z > rhs.z) - return true; - return false; + inline bool operator>(XYZ const &rhs) const { + return (x > rhs.x || y > rhs.y || z > rhs.z); } + XYZ &operator+=(XYZ const &rhs) { x += rhs.x; y += rhs.y; @@ -167,14 +161,14 @@ struct XYZ { return *this; } - XYZ operator+(XYZ const &rhs) const { return XYZ(*this) += rhs; } - XYZ operator-(XYZ const &rhs) const { return XYZ(*this) -= rhs; } - XYZ operator*(XYZ const &rhs) const { return XYZ(*this) *= rhs; } - XYZ operator/(XYZ const &rhs) const { return XYZ(*this) /= rhs; } + XYZ operator+(XYZ const &rhs) const { return XYZ(x+rhs.x, y+rhs.y, z+rhs.z); } + XYZ operator-(XYZ const &rhs) const { return XYZ(x-rhs.x, y-rhs.y, z-rhs.z); } + XYZ operator*(XYZ const &rhs) const { return XYZ(x*rhs.x, y*rhs.y, z*rhs.z); } + XYZ operator/(XYZ const &rhs) const { return XYZ(x/rhs.x, y/rhs.y, z/rhs.z); } - XYZ operator*(const double a) const { return XYZ(*this) *= a; } + XYZ operator*(const double a) const { return XYZ(x*a, y*a, z*a); } - XYZ operator-() const { return XYZ(*this) * -1.0; } + XYZ operator-() const { return XYZ(-x, -y, -z); } void Inverse() { x = 1.0 / x; @@ -182,11 +176,11 @@ struct XYZ { z = 1.0 / z; } - double Length() const { return sqrt(LengthSq()); } double LengthSq() const { return x * x + y * y + z * z; } + double Length() const { return sqrt(LengthSq()); } XYZ &Normalize() { - *this *= (1 / Length()); + *this *= (1.0 / Length()); return *this; } @@ -207,6 +201,9 @@ struct XYZ { m = z; return m; } + +public: + double x, y, z; }; inline std::ostream &operator<<(std::ostream &stream, const XYZ &p) { From 3c7b0bbf37a29086fc5cc016409dcd157d47193b Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Fri, 12 Jul 2024 08:39:34 -0400 Subject: [PATCH 02/16] Fix calls to Mersenne Twister for CBMC functions --- src/cbmc/DCFreeCycle.cpp | 12 ++++++++++-- src/cbmc/DCFreeCycleSeed.cpp | 12 ++++++++++-- src/cbmc/DCFreeHedron.cpp | 12 ++++++++++-- src/cbmc/DCFreeHedronSeed.cpp | 12 ++++++++++-- src/cbmc/DCRotateCOM.cpp | 12 ++++++++++-- 5 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/cbmc/DCFreeCycle.cpp b/src/cbmc/DCFreeCycle.cpp index e12557a19..5dc4eb307 100644 --- a/src/cbmc/DCFreeCycle.cpp +++ b/src/cbmc/DCFreeCycle.cpp @@ -139,9 +139,13 @@ void DCFreeCycle::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -217,9 +221,13 @@ void DCFreeCycle::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeCycleSeed.cpp b/src/cbmc/DCFreeCycleSeed.cpp index 2ed2829fd..f57a2aced 100644 --- a/src/cbmc/DCFreeCycleSeed.cpp +++ b/src/cbmc/DCFreeCycleSeed.cpp @@ -138,9 +138,13 @@ void DCFreeCycleSeed::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -216,9 +220,13 @@ void DCFreeCycleSeed::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeHedron.cpp b/src/cbmc/DCFreeHedron.cpp index f13c03e0a..89aa59efc 100644 --- a/src/cbmc/DCFreeHedron.cpp +++ b/src/cbmc/DCFreeHedron.cpp @@ -118,9 +118,13 @@ void DCFreeHedron::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -196,9 +200,13 @@ void DCFreeHedron::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCFreeHedronSeed.cpp b/src/cbmc/DCFreeHedronSeed.cpp index 6907994c1..b6d527069 100644 --- a/src/cbmc/DCFreeHedronSeed.cpp +++ b/src/cbmc/DCFreeHedronSeed.cpp @@ -117,9 +117,13 @@ void DCFreeHedronSeed::BuildNew(TrialMol &newMol, uint molIndex) { positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0)); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 0;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); @@ -195,9 +199,13 @@ void DCFreeHedronSeed::BuildOld(TrialMol &oldMol, uint molIndex) { positions[hed.NumBond()].Add(0, -center); // counting backward to preserve prototype + double u1, u2, u3; for (uint lj = nLJTrials; lj-- > 1;) { // convert chosen torsion to 3D positions - RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + u1 = prng(); + u2 = prng(); + u3 = prng(); + RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3); for (uint b = 0; b < hed.NumBond() + 1; ++b) { // find positions positions[b].Set(lj, spin.Apply(positions[b][0])); diff --git a/src/cbmc/DCRotateCOM.cpp b/src/cbmc/DCRotateCOM.cpp index ff112b468..19471438f 100644 --- a/src/cbmc/DCRotateCOM.cpp +++ b/src/cbmc/DCRotateCOM.cpp @@ -182,7 +182,11 @@ void DCRotateCOM::BuildNew(TrialMol &newMol, uint molIndex) { RandRotateZ(); } else { // convert chosen torsion to 3D positions - spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + double u1, u2, u3; + u1 = prng(); + u2 = prng(); + u3 = prng(); + spin = RotationMatrix::UniformRandom(u1, u2, u3); } for (uint a = 0; a < atomNumber; ++a) { @@ -287,7 +291,11 @@ void DCRotateCOM::BuildOld(TrialMol &oldMol, uint molIndex) { RandRotateZ(); } else { // convert chosen torsion to 3D positions - spin = RotationMatrix::UniformRandom(prng(), prng(), prng()); + double u1, u2, u3; + u1 = prng(); + u2 = prng(); + u3 = prng(); + spin = RotationMatrix::UniformRandom(u1, u2, u3); } for (uint a = 0; a < atomNumber; ++a) { From 124ff9895a88f9217eb3a51be8448672879bad65 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Wed, 7 Aug 2024 11:20:41 -0400 Subject: [PATCH 03/16] Fix grammar in comment --- src/FFSetup.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index fded99d3c..605c30ee5 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -380,7 +380,8 @@ void Dihedral::Read(Reader ¶m, std::string const &firstVar) { if (index == 0) { // set phase shift for n=0 to 90 degree // We will have C0 = Kchi (1 + cos(0 * phi + 90)) = Kchi - //this avoids double counting the C0 (constant offset) term, which is used force fields like TraPPE + // this avoids double counting the C0 (constant offset) term, which is used + // in force fields like TraPPE def = 90.00; } Add(merged, coeff, index, def); From 6b2a8eaec59fd0856fffd8f735a85c4256cc150b Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Wed, 7 Aug 2024 12:02:13 -0400 Subject: [PATCH 04/16] Remove duplicate dihedrals with the same periodicity and issue a warning --- src/FFSetup.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index 605c30ee5..18b8d2b50 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -389,6 +389,17 @@ void Dihedral::Read(Reader ¶m, std::string const &firstVar) { } void Dihedral::Add(std::string const &merged, const double coeff, const uint index, const double def) { + // Check for (and skip) duplicate periodicities for the same dihedral + bool duplicate = false; + for (auto it = n[merged].begin(); it != n[merged].end(); ++it) { + duplicate |= *it == index; + } + + if (duplicate) { + std::cout << "Warning: Skipping duplicate periodicity of " << index + << " for dihedral " << merged << "!\n"; + return; + } ++countTerms; Kchi[merged].push_back(EnConvIfCHARMM(coeff)); n[merged].push_back(index); From 02bfcf67796972e7042a8e2496a0869a92e44ed1 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Thu, 8 Aug 2024 15:45:13 -0400 Subject: [PATCH 05/16] Generate error instead of warning if duplicates have different parameters --- src/FFSetup.cpp | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index 18b8d2b50..654147623 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -19,6 +19,7 @@ along with this program, also can be found at // For Exotic style parameter header error checking #include +const double EPSILON = 0.001; const uint FFSetup::CHARMM_ALIAS_IDX = 0; const uint FFSetup::EXOTIC_ALIAS_IDX = 1; const std::string FFSetup::paramFileAlias[] = {"CHARMM-Style parameter file", @@ -390,15 +391,27 @@ void Dihedral::Read(Reader ¶m, std::string const &firstVar) { void Dihedral::Add(std::string const &merged, const double coeff, const uint index, const double def) { // Check for (and skip) duplicate periodicities for the same dihedral - bool duplicate = false; + // Generate an error and terminate if the duplicate dihedrals have different + // parameters + auto Kchi_it = Kchi[merged].begin(); + auto delta_it = delta[merged].begin(); for (auto it = n[merged].begin(); it != n[merged].end(); ++it) { - duplicate |= *it == index; - } - - if (duplicate) { - std::cout << "Warning: Skipping duplicate periodicity of " << index - << " for dihedral " << merged << "!\n"; - return; + // Found a duplicate dihedral + if (*it == index) { + if (std::fabs(*Kchi_it - EnConvIfCHARMM(coeff)) > EPSILON || + std::fabs(*delta_it - geom::DegToRad(def)) > EPSILON) { + std::cout << "Error: Inconsistent Dihedral parameters were found in " + "parameter file for dihedral " + << merged << " with periodicity " << index << "!\n"; + exit(EXIT_FAILURE); + } else { + std::cout << "Warning: Skipping duplicate periodicity of " << index + << " for dihedral " << merged << "!\n"; + return; + } + } + Kchi_it++; + delta_it++; } ++countTerms; Kchi[merged].push_back(EnConvIfCHARMM(coeff)); From 61d039817ea32262ea72365ed0744a260c4b1287 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Wed, 21 Aug 2024 13:44:33 -0400 Subject: [PATCH 06/16] Also check for duplicate Bonds, Angles, etc. and minor code cleanup --- src/FFSetup.cpp | 34 +++++++++++++++++++--------------- src/FFSetup.h | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index 654147623..33251c57a 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -19,7 +19,7 @@ along with this program, also can be found at // For Exotic style parameter header error checking #include -const double EPSILON = 0.001; +const double EPSILON = 1.0e-4; const uint FFSetup::CHARMM_ALIAS_IDX = 0; const uint FFSetup::EXOTIC_ALIAS_IDX = 1; const std::string FFSetup::paramFileAlias[] = {"CHARMM-Style parameter file", @@ -186,6 +186,11 @@ std::string FFBase::ReadKind(Reader ¶m, std::string const &firstKindName) { param.file >> tmp; merged += tmp; } + // Skip duplicates unless we allow multiple entries, such as for dihedrals + if (!multi && std::find(name.begin(), name.end(), merged) != name.end()) + std::cout << "Warning: Ignoring duplicate entry of " << merged + << ". Using first entry!\n"; + // Might insert duplicates but they will be ignored during execution if (!multi || std::find(name.begin(), name.end(), merged) == name.end()) name.push_back(merged); return merged; @@ -237,7 +242,7 @@ void Particle::Read(Reader ¶m, std::string const &firstVar) { // computation. But need the exponents to be large enough that the arithmetic // or geometric mean of any pair > 6.0. See FFParticle::Blend() for underlying // math. - double smallVal = 1e-20; + double smallVal = 1.0e-20; if (std::fabs(e) < smallVal) { e = 0.0; expN = 12.0; // Set to default (LJ) exponent. @@ -247,12 +252,11 @@ void Particle::Read(Reader ¶m, std::string const &firstVar) { expN_1_4 = 12.0; // Set to default (LJ) exponent. } if ((expN - 6.0) < smallVal) { - std::cout << "ERROR: Mie exponent must be > 6!" << std::endl; + std::cout << "ERROR: Mie exponent must be > 6!\n"; exit(EXIT_FAILURE); } if ((expN_1_4 - 6.0) < smallVal) { - std::cout << "ERROR: Mie exponent for 1-4 interactions must be > 6!" - << std::endl; + std::cout << "ERROR: Mie exponent for 1-4 interactions must be > 6!\n"; exit(EXIT_FAILURE); } @@ -400,13 +404,13 @@ void Dihedral::Add(std::string const &merged, const double coeff, if (*it == index) { if (std::fabs(*Kchi_it - EnConvIfCHARMM(coeff)) > EPSILON || std::fabs(*delta_it - geom::DegToRad(def)) > EPSILON) { - std::cout << "Error: Inconsistent Dihedral parameters were found in " - "parameter file for dihedral " - << merged << " with periodicity " << index << "!\n"; + std::cout << "Error: Inconsistent dihedral parameters were found in " + << "parameter file for dihedral " << merged << " with " + << "periodicity " << index << "!\n"; exit(EXIT_FAILURE); } else { - std::cout << "Warning: Skipping duplicate periodicity of " << index - << " for dihedral " << merged << "!\n"; + std::cout << "Warning: Ignoring duplicate periodicity of " << index + << " for dihedral " << merged << ". Using first entry!\n"; return; } } @@ -424,7 +428,7 @@ void Improper::Read(Reader ¶m, std::string const &firstVar) { uint index; std::string merged = ReadKind(param, firstVar); // If new value - if (validname(merged) == true) { + if (validname(merged)) { param.file >> coeff >> index >> def; if (!param.file.good()) { std::cout << "Error: Incomplete Improper parameters was found in " @@ -445,7 +449,7 @@ void CMap::Read(Reader ¶m, std::string const &firstVar) { uint index; std::string merged = ReadKind(param, firstVar); // If new value - if (validname(merged) == true) { + if (validname(merged)) { param.file >> coeff >> index >> def; if (!param.file.good()) { std::cout << "Error: Incomplete Improper parameters was found in " @@ -455,19 +459,19 @@ void CMap::Read(Reader ¶m, std::string const &firstVar) { Add(coeff, def); } } -// Currently dummy method, exact same as improper +// Currently dummy method, exactly the same as improper void CMap::Add(const double coeff, const double def) { Komega.push_back(EnConvIfCHARMM(coeff)); omega0.push_back(def); } -// Currently dummy method, exact same as improper +// Currently dummy method, exactly the same as improper void HBond::Read(Reader ¶m, std::string const &firstVar) { double coeff, def; uint index; std::string merged = ReadKind(param, firstVar); // If new value - if (validname(merged) == true) { + if (validname(merged)) { param.file >> coeff >> index >> def; if (!param.file.good()) { std::cout << "Error: Incomplete Improper parameters was found in " diff --git a/src/FFSetup.h b/src/FFSetup.h index e7bd1f5a4..7a046dbe7 100644 --- a/src/FFSetup.h +++ b/src/FFSetup.h @@ -40,7 +40,7 @@ class FFBase : public SearchableBase { std::string ReadKind(Reader ¶m, std::string const &firstKindName); std::vector &getnamelist() { return name; } - bool validname(std::string &merged) { + bool validname(const std::string &merged) const { return (std::count(name.begin(), name.end(), merged) > 0); } std::string getname(const uint i) const { return name[i]; } From 34c80aba412fe7a9dc388b85e61e1fb8e3e12af3 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Fri, 23 Aug 2024 19:35:41 -0400 Subject: [PATCH 07/16] Enhance patch to print filename in warnings --- src/FFSetup.cpp | 20 +++++++++++--------- src/FFSetup.h | 4 ++-- src/Reader.h | 2 ++ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index 33251c57a..c724622e6 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -187,9 +187,10 @@ std::string FFBase::ReadKind(Reader ¶m, std::string const &firstKindName) { merged += tmp; } // Skip duplicates unless we allow multiple entries, such as for dihedrals - if (!multi && std::find(name.begin(), name.end(), merged) != name.end()) - std::cout << "Warning: Ignoring duplicate entry of " << merged - << ". Using first entry!\n"; + if (!multi && std::find(name.begin(), name.end(), merged) != name.end()) { + std::cout << "Warning: Ignoring duplicate entry for " << merged << " in " + << param.getFileName().c_str() << ". Using first entry!\n"; + } // Might insert duplicates but they will be ignored during execution if (!multi || std::find(name.begin(), name.end(), merged) == name.end()) name.push_back(merged); @@ -389,11 +390,11 @@ void Dihedral::Read(Reader ¶m, std::string const &firstVar) { // in force fields like TraPPE def = 90.00; } - Add(merged, coeff, index, def); + Add(param.getFileName(), merged, coeff, index, def); last = merged; } -void Dihedral::Add(std::string const &merged, const double coeff, - const uint index, const double def) { +void Dihedral::Add(const std::string &fileName, const std::string &merged, + const double coeff, const uint index, const double def) { // Check for (and skip) duplicate periodicities for the same dihedral // Generate an error and terminate if the duplicate dihedrals have different // parameters @@ -405,12 +406,13 @@ void Dihedral::Add(std::string const &merged, const double coeff, if (std::fabs(*Kchi_it - EnConvIfCHARMM(coeff)) > EPSILON || std::fabs(*delta_it - geom::DegToRad(def)) > EPSILON) { std::cout << "Error: Inconsistent dihedral parameters were found in " - << "parameter file for dihedral " << merged << " with " - << "periodicity " << index << "!\n"; + << fileName.c_str() << " for dihedral " << merged + << " with periodicity " << index << "!\n"; exit(EXIT_FAILURE); } else { std::cout << "Warning: Ignoring duplicate periodicity of " << index - << " for dihedral " << merged << ". Using first entry!\n"; + << " for dihedral " << merged << " in " + << fileName.c_str() << ". Using first entry!\n"; return; } } diff --git a/src/FFSetup.h b/src/FFSetup.h index 7a046dbe7..ca34608ed 100644 --- a/src/FFSetup.h +++ b/src/FFSetup.h @@ -146,8 +146,8 @@ class Dihedral : public ReadableBaseWithFirst, public FFBase { public: Dihedral(void) : FFBase(4, true), last(""), countTerms(0) {} virtual void Read(Reader ¶m, std::string const &firstVar); - void Add(std::string const &merged, const double coeff, const uint index, - const double def); + void Add(const std::string &fileName, const std::string &merged, + const double coeff, const uint index, const double def); uint getTerms() const { return countTerms; } uint append(std::string &s, double *Kchi_in, double *delta_in, uint *n_in, uint count) const { diff --git a/src/Reader.h b/src/Reader.h index 28c4e28b9..5775114bf 100644 --- a/src/Reader.h +++ b/src/Reader.h @@ -32,6 +32,7 @@ inline std::ifstream &operator>>(std::ifstream &is, bool &val) { class Reader { public: + friend inline std::ifstream &operator>>(std::ifstream &is, bool &val); Reader(std::string const &name, std::string const &alias, bool useSkipW = false, std::string const *const skipW = NULL, bool useSkipC = false, std::string const *const skipC = NULL, @@ -98,6 +99,7 @@ class Reader { } bool Read(std::string &firstItem); + std::string getFileName() const { return fileName; }; // Make public to expose object. std::ifstream file; From 0dd9bcdbe5a2b486baf27a4133a8c6390840ceb6 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Fri, 23 Aug 2024 21:38:33 -0400 Subject: [PATCH 08/16] Update new warning messages --- src/FFSetup.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index c724622e6..8b60a5eee 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -189,7 +189,7 @@ std::string FFBase::ReadKind(Reader ¶m, std::string const &firstKindName) { // Skip duplicates unless we allow multiple entries, such as for dihedrals if (!multi && std::find(name.begin(), name.end(), merged) != name.end()) { std::cout << "Warning: Ignoring duplicate entry for " << merged << " in " - << param.getFileName().c_str() << ". Using first entry!\n"; + << param.getFileName().c_str() << ". Using first entry.\n"; } // Might insert duplicates but they will be ignored during execution if (!multi || std::find(name.begin(), name.end(), merged) == name.end()) @@ -412,7 +412,7 @@ void Dihedral::Add(const std::string &fileName, const std::string &merged, } else { std::cout << "Warning: Ignoring duplicate periodicity of " << index << " for dihedral " << merged << " in " - << fileName.c_str() << ". Using first entry!\n"; + << fileName.c_str() << ". Using first entry.\n"; return; } } From a3a7c1c0d793e5b3a978e8cedd6fc802ce56aa7e Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Sat, 24 Aug 2024 17:31:27 -0400 Subject: [PATCH 09/16] Update warning message for duplicated dihedrals --- src/FFSetup.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp index 8b60a5eee..9f4388b08 100644 --- a/src/FFSetup.cpp +++ b/src/FFSetup.cpp @@ -411,8 +411,8 @@ void Dihedral::Add(const std::string &fileName, const std::string &merged, exit(EXIT_FAILURE); } else { std::cout << "Warning: Ignoring duplicate periodicity of " << index - << " for dihedral " << merged << " in " - << fileName.c_str() << ". Using first entry.\n"; + << " for dihedral " << merged << " in " << fileName.c_str() + << ".\n"; return; } } From f332038f50960447c20faf07338fc011acafe1e9 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Fri, 11 Oct 2024 22:04:33 -0400 Subject: [PATCH 10/16] Fix log message for Intra-MEMC1 move --- src/ConfigSetup.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp index 8eb1b2536..5e087604d 100644 --- a/src/ConfigSetup.cpp +++ b/src/ConfigSetup.cpp @@ -836,7 +836,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) { } else if (CheckString(line[0], "IntraMEMC-1Freq")) { if (stringtod(line[1]) > 0.0) { sys.moves.intraMemc = stringtod(line[1]); - printf("%-40s %-4.4f \n", "Info: IntraMEMC-2 move frequency", + printf("%-40s %-4.4f \n", "Info: IntraMEMC-1 move frequency", sys.moves.intraMemc); sys.intraMemcVal.enable = true; sys.intraMemcVal.MEMC1 = true; From 8f05a7fb177eab5132f5fd798b21251b58ae0ab9 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Mon, 28 Oct 2024 06:54:44 -0400 Subject: [PATCH 11/16] Explicitly call std library sqrt function --- lib/BasicTypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/BasicTypes.h b/lib/BasicTypes.h index 18097d20c..8907ec45d 100644 --- a/lib/BasicTypes.h +++ b/lib/BasicTypes.h @@ -177,7 +177,7 @@ class XYZ { } double LengthSq() const { return x * x + y * y + z * z; } - double Length() const { return sqrt(LengthSq()); } + double Length() const { return std::sqrt(LengthSq()); } XYZ &Normalize() { *this *= (1.0 / Length()); From 9563e139ed3323933cf3067c01ab9f0577898aa7 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Thu, 12 Dec 2024 21:47:34 -0500 Subject: [PATCH 12/16] Pass more forcefield parameters to the GPU for consistency --- src/Ewald.cpp | 3 +- src/FFParticle.cpp | 7 ++- src/Forcefield.h | 4 +- src/GPU/CalculateEwaldCUDAKernel.cu | 24 ++++---- src/GPU/CalculateEwaldCUDAKernel.cuh | 12 ++-- src/GPU/CalculateForceCUDAKernel.cu | 71 ++++++++++++----------- src/GPU/CalculateForceCUDAKernel.cuh | 40 +++++++------ src/GPU/ConstantDefinitionsCUDAKernel.cu | 17 ++++-- src/GPU/ConstantDefinitionsCUDAKernel.cuh | 12 ++-- src/GPU/VariablesCUDA.cuh | 9 ++- 10 files changed, 106 insertions(+), 93 deletions(-) diff --git a/src/Ewald.cpp b/src/Ewald.cpp index de23f1bc7..d97d192cc 100644 --- a/src/Ewald.cpp +++ b/src/Ewald.cpp @@ -1530,8 +1530,7 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords, CallBoxForceReciprocalGPU( ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge, particleMol, particleHasNoCharge, particleUsed, startMol, lengthMol, - ff.alpha[box], ff.alphaSq[box], constValue, imageSizeRef[box], - molCoords, currentAxes, box); + constValue, imageSizeRef[box], molCoords, currentAxes, box); delete[] particleUsed; #else // molecule iterator diff --git a/src/FFParticle.cpp b/src/FFParticle.cpp index db35b589d..11e169b8b 100644 --- a/src/FFParticle.cpp +++ b/src/FFParticle.cpp @@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie, double diElectric_1 = 1.0 / forcefield.dielectric; InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind, forcefield.isMartini, count, forcefield.rCut, - forcefield.rCutCoulomb, forcefield.rCutLow, - forcefield.rswitch, forcefield.alpha, forcefield.ewald, - diElectric_1); + forcefield.rCutSq, forcefield.rCutCoulomb, + forcefield.rCutCoulombSq, forcefield.rCutLow, + forcefield.rswitch, forcefield.alpha, forcefield.alphaSq, + forcefield.ewald, diElectric_1); #endif } diff --git a/src/Forcefield.h b/src/Forcefield.h index 85ddcd5d5..64ac8f6ba 100644 --- a/src/Forcefield.h +++ b/src/Forcefield.h @@ -54,12 +54,12 @@ class Forcefield { double tolerance; // Ewald sum terms double rswitch; // Switch distance double dielectric; // dielectric for martini - double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions + double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions double sc_alpha; // Free energy parameter double sc_sigma, sc_sigma_6; // Free energy parameter bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction - bool electrostatic, ewald; // To consider columb interaction + bool electrostatic, ewald; // To consider coulomb interaction bool vdwGeometricSigma; // For sigma combining rule bool isMartini; bool exp6; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 5812426b6..164091427 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -450,8 +450,8 @@ void CallBoxForceReciprocalGPU( const std::vector &particleMol, const std::vector &particleHasNoCharge, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, double alphaSq, double constValue, uint imageSize, - XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) { + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box) { int atomCount = atomForceRec.Count(); int molCount = molForceRec.Count(); double *gpu_particleCharge; @@ -518,13 +518,13 @@ void CallBoxForceReciprocalGPU( vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz, vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz, gpu_particleCharge, gpu_particleMol, gpu_particleHasNoCharge, - gpu_particleUsed, gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue, - imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box], - vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z, - vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box], - vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + gpu_particleUsed, gpu_startMol, gpu_lengthMol, vars->gpu_alpha, + vars->gpu_alphaSq, constValue, imageSize, vars->gpu_kxRef[box], + vars->gpu_kyRef[box], vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, + vars->gpu_z, vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], + vars->gpu_sumInew[box], vars->gpu_isFraction, vars->gpu_molIndex, + vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box, atomCount); cudaDeviceSynchronize(); @@ -558,7 +558,7 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double alpha, double alphaSq, double constValue, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, @@ -627,11 +627,11 @@ __global__ void BoxForceReciprocalGPU( gpu_Invcell_z); dist = sqrt(distSq); - double expConstValue = exp(-1.0 * alphaSq * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq); double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(alpha * dist) / dist) - constValue * expConstValue); + intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 16a3fc532..80062586b 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_EWALD_CUDA_KERNEL -#define CALCULATE_EWALD_CUDA_KERNEL +#ifndef CALCULATE_EWALD_CUDA_KERNEL_H +#define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -23,8 +23,6 @@ void CallBoxForceReciprocalGPU(VariablesCUDA *vars, const bool *particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, - double alphaSq, double constValue, uint imageSize, XYZArray const &molCoords, @@ -103,8 +101,8 @@ __global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, bool *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol, - double alpha, - double alphaSq, + double *gpu_alpha, + double *gpu_alphaSq, double constValue, int imageSize, double *gpu_kx, @@ -190,4 +188,4 @@ __global__ void BoxReciprocalGPU(double *gpu_prefact, int imageSize); #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_EWALD_CUDA_KERNEL*/ +#endif /*CALCULATE_EWALD_CUDA_KERNEL_H*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index c827a803d..219a6cafb 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,7 +125,7 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, @@ -302,7 +302,7 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], @@ -469,7 +469,7 @@ __global__ void BoxInterForceGPU( double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, @@ -577,7 +577,7 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); @@ -608,7 +608,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, @@ -708,7 +708,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box], + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); } @@ -868,12 +868,12 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, + double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -886,20 +886,20 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -909,13 +909,13 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { @@ -928,20 +928,20 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -950,13 +950,13 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,20 +968,20 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } else { return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -990,12 +990,12 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } @@ -1009,11 +1009,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } else { return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } @@ -1021,13 +1021,14 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -1053,7 +1054,7 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, @@ -1061,7 +1062,7 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } if (sc_coul) { @@ -1075,21 +1076,21 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * CalcCoulombVirSwitchGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_alphaSq, gpu_rCut); } } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut) { + double gpu_alphaSq, double gpu_rCut) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq * distSq); double temp = 1.0 - erf(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 29d7953db..072939c24 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CALCULATE_FORCE_CUDA_KERNEL -#define CALCULATE_FORCE_CUDA_KERNEL +#ifndef CALCULATE_FORCE_CUDA_KERNEL_H +#define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -114,6 +114,7 @@ __global__ void BoxForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, @@ -183,6 +184,7 @@ __global__ void BoxInterForceGPU(int *gpu_cellStartIndex, double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, @@ -249,32 +251,32 @@ __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, //ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, double gpu_alphaSq); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, @@ -286,18 +288,18 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut, int index, double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, + int gpu_ewald, double gpu_alpha, double gpu_alphaSq, double gpu_rCut); //VDW Calculation @@ -359,7 +361,7 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, int gpu_isMartini, - double gpu_alpha, + double gpu_alpha, double gpu_alphaSq, double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, @@ -377,25 +379,25 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int index = FlatIndexGPU(kind1, kind2, gpu_count); if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaCoulomb); @@ -403,4 +405,4 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, #endif /*GOMC_CUDA*/ -#endif /*CALCULATE_FORCE_CUDA_KERNEL*/ +#endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu index eda129319..e32aa3098 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cu +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu @@ -31,9 +31,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, - int isMartini, int count, double Rcut, - double const *rCutCoulomb, double RcutLow, double Ron, - double const *alpha, int ewald, double diElectric_1) { + int isMartini, int count, double Rcut, double RcutSq, + double const *rCutCoulomb, double const *rCutCoulombSq, + double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1) { int countSq = count * count; CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double)); CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double)); @@ -42,14 +43,17 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, CUMALLOC((void **)&vars.gpu_isMartini, sizeof(int)); CUMALLOC((void **)&vars.gpu_count, sizeof(int)); CUMALLOC((void **)&vars.gpu_rCut, sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutSq, sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutCoulomb, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutCoulombSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutLow, sizeof(double)); CUMALLOC((void **)&vars.gpu_rOn, sizeof(double)); CUMALLOC((void **)&vars.gpu_alpha, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_alphaSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_ewald, sizeof(int)); CUMALLOC((void **)&vars.gpu_diElectric_1, sizeof(double)); - // allocate gpu memory for lambda variables + // allocate GPU memory for lambda variables CUMALLOC((void **)&vars.gpu_molIndex, (int)BOX_TOTAL * sizeof(int)); CUMALLOC((void **)&vars.gpu_lambdaVDW, (int)BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_lambdaCoulomb, (int)BOX_TOTAL * sizeof(double)); @@ -65,13 +69,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_count, &count, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCut, &Rcut, sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutSq, &RcutSq, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutCoulomb, rCutCoulomb, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutCoulombSq, rCutCoulombSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutLow, &RcutLow, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rOn, &Ron, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_alpha, alpha, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_alphaSq, alphaSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double), cudaMemcpyHostToDevice); diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 68eafb71b..3584e46c2 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt along with this program, also can be found at . ********************************************************************************/ -#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL -#define CONSTANT_DEFINITIONS_CUDA_KERNEL +#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H +#define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA #include @@ -25,9 +25,9 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, int isMartini, int count, - double Rcut, double const *rCutCoulomb, - double RcutLow, double Ron, double const *alpha, - int ewald, double diElectric_1); + double Rcut, double RcutSq, double const *rCutCoulomb, + double const *rCutCoulombSq, double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1); void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber, uint maxAtomsInMol, uint maxMolNumber); void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal); @@ -47,4 +47,4 @@ void DestroyExp6CUDAVars(VariablesCUDA *vars); void DestroyCUDAVars(VariablesCUDA *vars); #endif /*GOMC_CUDA*/ -#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL*/ +#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL_H*/ diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index 687b6e2c6..00b63e272 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -65,10 +65,13 @@ public: gpu_isMartini = NULL; gpu_count = NULL; gpu_rCut = NULL; + gpu_rCutSq = NULL; gpu_rCutLow = NULL; gpu_rOn = NULL; gpu_alpha = NULL; + gpu_alphaSq = NULL; gpu_rCutCoulomb = NULL; + gpu_rCutCoulombSq = NULL; gpu_ewald = NULL; gpu_diElectric_1 = NULL; gpu_aForcex = NULL; @@ -92,11 +95,11 @@ public: int *gpu_isMartini; int *gpu_count; int *gpu_startAtomIdx; //start atom index of the molecule - double *gpu_rCut; - double *gpu_rCutCoulomb; + double *gpu_rCut, *gpu_rCutSq; + double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow; double *gpu_rOn; - double *gpu_alpha; + double *gpu_alpha, *gpu_alphaSq; int *gpu_ewald; double *gpu_diElectric_1; double *gpu_x, *gpu_y, *gpu_z; From b0b0b3f1234016bdaefbcc61378eb8fe66e39763 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Thu, 12 Dec 2024 21:56:49 -0500 Subject: [PATCH 13/16] Reformat per clang formatting rules --- src/BlockOutput.h | 8 +- src/ConfigSetup.cpp | 59 +-- src/GPU/CUDAMemoryManager.cuh | 19 +- src/GPU/CalculateEnergyCUDAKernel.cuh | 238 ++++------ src/GPU/CalculateEwaldCUDAKernel.cu | 20 +- src/GPU/CalculateEwaldCUDAKernel.cuh | 215 +++------ src/GPU/CalculateForceCUDAKernel.cu | 185 ++++---- src/GPU/CalculateForceCUDAKernel.cuh | 521 ++++++++-------------- src/GPU/ConstantDefinitionsCUDAKernel.cuh | 19 +- src/GPU/VariablesCUDA.cuh | 62 +-- 10 files changed, 544 insertions(+), 802 deletions(-) diff --git a/src/BlockOutput.h b/src/BlockOutput.h index 302a427a3..047bcb226 100644 --- a/src/BlockOutput.h +++ b/src/BlockOutput.h @@ -111,13 +111,13 @@ struct BlockAverages : OutputableBase { stepsPerOut = event.frequency; invSteps = 1.0 / stepsPerOut; firstInvSteps = invSteps; - //Handle the case where we are restarting from a checkpoint and the first - //interval is smaller than expected because we create a checkpoint more - //often than the Block output frequency. + // Handle the case where we are restarting from a checkpoint and the first + // interval is smaller than expected because we create a checkpoint more + // often than the Block output frequency. if (startStep != 0 && (startStep % stepsPerOut) != 0) { ulong diff; diff = stepsPerOut - (startStep % stepsPerOut); - firstInvSteps = 1.0/diff; + firstInvSteps = 1.0 / diff; } enableOut = event.enable; } diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp index 5e087604d..7cd429c54 100644 --- a/src/ConfigSetup.cpp +++ b/src/ConfigSetup.cpp @@ -1897,7 +1897,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction cannot be " << "used with LJ long-range corrections." << std::endl; exit(EXIT_FAILURE); - } if (((sys.ff.VDW_KIND == sys.ff.VDW_SHIFT_KIND) || (sys.ff.VDW_KIND == sys.ff.VDW_SWITCH_KIND)) && @@ -1905,7 +1904,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction is not supported " << "for SWITCH or SHIFT potentials." << std::endl; exit(EXIT_FAILURE); - } if (sys.ff.doImpulsePressureCorr && sys.ff.doTailCorr) { std::cout << "ERROR: Both LRC (Long Range Correction) and " @@ -2104,9 +2102,10 @@ void ConfigSetup::verifyInputs(void) { if (in.restart.restartFromBinaryCoorFile) { for (i = 0; i < BOX_TOTAL; i++) { if (!in.files.binaryCoorInput.defined[i]) { - std::cout << "ERROR: Binary coordinate file was not specified for box " - "number " - << i << "!" << std::endl; + std::cout + << "ERROR: Binary coordinate file was not specified for box " + "number " + << i << "!" << std::endl; exit(EXIT_FAILURE); } } @@ -2174,25 +2173,30 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.MEMC1 && sys.memcVal.MEMC2) || (sys.memcVal.MEMC1 && sys.memcVal.MEMC3) || (sys.memcVal.MEMC2 && sys.memcVal.MEMC3)) { - std::cout << "ERROR: Multiple MEMC methods were specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple MEMC methods were specified, but only one " + "is allowed!\n"; exit(EXIT_FAILURE); } if ((sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC2) || (sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC3) || (sys.intraMemcVal.MEMC2 && sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only " + "one is allowed!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readVol || !sys.intraMemcVal.readVol) { - std::cout << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readRatio || !sys.intraMemcVal.readRatio) { - std::cout << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.exchangeRatio.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.exchangeRatio.size() << " exchange ratio was specified!\n"; @@ -2209,49 +2213,52 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.largeKind.size() != sys.memcVal.smallKind.size()) || (sys.intraMemcVal.largeKind.size() != sys.intraMemcVal.smallKind.size())) { - std::cout - << "ERROR: In the MEMC method, the specified number of Large Kinds is not " - << " equal as specified number of Small Kinds!\n"; + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds is not " + << " equal as specified number of Small Kinds!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readLargeBB || !sys.intraMemcVal.readLargeBB) { - std::cout - << "ERROR: In the MEMC method, Large Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC method, Large Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.largeBBAtom1.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.largeBBAtom1.size() << " sets of Large Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && !sys.memcVal.readSmallBB) { - std::cout - << "ERROR: In the MEMC-2 method, Small Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && (sys.memcVal.smallKind.size() != sys.memcVal.smallBBAtom1.size())) { - std::cout - << "ERROR: In the MEMC-2 method, the specified number of Small Kinds was " - << sys.memcVal.smallKind.size() << ", but " - << sys.memcVal.smallBBAtom1.size() - << " sets of Small Molecule BackBone was specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, the specified number of Small " + "Kinds was " + << sys.memcVal.smallKind.size() << ", but " + << sys.memcVal.smallBBAtom1.size() + << " sets of Small Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.intraMemcVal.MEMC2 && !sys.intraMemcVal.readSmallBB) { - std::cout << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " - "specified!\n"; + std::cout + << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.enable && sys.intraMemcVal.enable) { if ((sys.memcVal.MEMC1 && !sys.intraMemcVal.MEMC1) || (sys.memcVal.MEMC2 && !sys.intraMemcVal.MEMC2) || (sys.memcVal.MEMC3 && !sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: The selected intra-MEMC method was not same as the inter-MEMC method!\n"; + std::cout << "ERROR: The selected intra-MEMC method was not same as " + "the inter-MEMC method!\n"; exit(EXIT_FAILURE); } } diff --git a/src/GPU/CUDAMemoryManager.cuh b/src/GPU/CUDAMemoryManager.cuh index 0b5b52dcf..06e476a9d 100644 --- a/src/GPU/CUDAMemoryManager.cuh +++ b/src/GPU/CUDAMemoryManager.cuh @@ -2,28 +2,31 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA #include #include -#include #include +#include -#define CUMALLOC(address,size) CUDAMemoryManager::mallocMemory(address,size,#address) -#define CUFREE(address) CUDAMemoryManager::freeMemory(address,#address) +#define CUMALLOC(address, size) \ + CUDAMemoryManager::mallocMemory(address, size, #address) +#define CUFREE(address) CUDAMemoryManager::freeMemory(address, #address) -class CUDAMemoryManager -{ +class CUDAMemoryManager { public: - static cudaError_t mallocMemory(void **address, unsigned int size, std::string var_name); + static cudaError_t mallocMemory(void **address, unsigned int size, + std::string var_name); static cudaError_t freeMemory(void *address, std::string var_name); static bool isFreed(); private: static long long totalAllocatedBytes; - static std::unordered_map > allocatedPointers; + static std::unordered_map> + allocatedPointers; }; #endif diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh index 9f1366400..b0367cc3c 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cuh +++ b/src/GPU/CalculateEnergyCUDAKernel.cuh @@ -2,93 +2,54 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA +#include "BoxDimensions.h" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include #include -#include "XYZArray.h" -#include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -void CallBoxInterGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, - XYZArray const &coords, - BoxDimensions const &boxAxes, + const std::vector> &neighborList, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); - -__global__ void BoxInterGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); + const std::vector &particleMol, double &REn, + double &LJEn, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); +__global__ void +BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, double *gpu_x, double *gpu_y, double *gpu_z, + double3 axis, double3 halfAx, bool electrostatic, + double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, + double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, + bool *gpu_isFraction, int box); -__device__ double CalcCoulombGPU(double distSq, int kind1, int kind2, - double qi_qj_fact, double gpu_rCutLow, - int gpu_ewald, int gpu_VDW_Kind, - double gpu_alpha, double gpu_rCutCoulomb, - int gpu_isMartini, double gpu_diElectric_1, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq, - int gpu_count); +__device__ double +CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, + double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind, + double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini, + double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, + double *gpu_sigmaSq, int gpu_count); __device__ double CalcCoulombVirGPU(double distSq, double qi_qj, double gpu_rCutCoulomb, double gpu_alpha, int gpu_VDW_Kind, int gpu_ewald, @@ -102,78 +63,74 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// -__device__ double CalcCoulombParticleGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); +__device__ double CalcCoulombParticleGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombParticleGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha); -__device__ double CalcCoulombShiftGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); + double qi_qj_fact, + int gpu_ewald, + double gpu_alpha); +__device__ double CalcCoulombShiftGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_rCut, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombShiftGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut); -__device__ double CalcCoulombExp6GPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); + int gpu_ewald, double gpu_alpha, + double gpu_rCut); +__device__ double CalcCoulombExp6GPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double *gpu_sigmaSq); __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha); -__device__ double CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1, - double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); -__device__ double CalcCoulombSwitchMartiniGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1); -__device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact, - double gpu_alpha, int gpu_ewald, - double gpu_rCut, + int gpu_ewald, double gpu_alpha); +__device__ double +CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, double gpu_rCut, + double gpu_diElectric_1, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); +__device__ double +CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, + double gpu_rCut, double gpu_diElectric_1); +__device__ double CalcCoulombSwitchGPU(double distSq, int index, + double qi_qj_fact, double gpu_alpha, + int gpu_ewald, double gpu_rCut, double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, double gpu_rCut); - + int gpu_ewald, double gpu_alpha, + double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcEnParticleGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); + double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); __device__ double CalcEnParticleGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn); __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnShiftGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut); __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, @@ -181,28 +138,23 @@ __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_rMaxSq, double *gpu_expConst); __device__ double CalcEnExp6GPUNoLambda(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); -__device__ double CalcEnSwitchMartiniGPU(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); -__device__ double CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn); +__device__ double +CalcEnSwitchMartiniGPU(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); +__device__ double +CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 164091427..8c92d9f26 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -558,14 +558,15 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, - int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, - double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, - double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, - int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x, - double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, - double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx, - double axy, double axz, int box, int atomCount) { + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount) { __shared__ double shared_kvector[IMAGES_PER_BLOCK * 3]; int particleID = blockDim.x * blockIdx.x + threadIdx.x; int offset_vector_index = blockIdx.y * IMAGES_PER_BLOCK; @@ -631,7 +632,8 @@ __global__ void BoxForceReciprocalGPU( double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); + intraForce *= + ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 80062586b..638d5662c 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -2,189 +2,112 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_EWALD_CUDA_KERNEL_H #define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include -#include "VariablesCUDA.cuh" #include -#include "XYZArray.h" -void CallBoxForceReciprocalGPU(VariablesCUDA *vars, - XYZArray &atomForceRec, - XYZArray &molForceRec, - const std::vector &particleCharge, - const std::vector &particleMol, - const std::vector &particleHasNoCharge, - const bool *particleUsed, - const std::vector &startMol, - const std::vector &lengthMol, - double constValue, - uint imageSize, - XYZArray const &molCoords, - BoxDimensions const &boxAxes, - int box); +void CallBoxForceReciprocalGPU( + VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec, + const std::vector &particleCharge, + const std::vector &particleMol, + const std::vector &particleHasNoCharge, const bool *particleUsed, + const std::vector &startMol, const std::vector &lengthMol, + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box); -void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, - XYZArray const &coords, - double const *kx, - double const *ky, +void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, + double const *kx, double const *ky, double const *kz, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double *prefact, - double *hsqr, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double *prefact, double *hsqr, + double &energyRecip, uint box); -void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecip, uint box); -void CallMolReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const &newCoords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecipNew, uint box); -//Calculate reciprocal term for lambdaNew and Old with same coordinates +// Calculate reciprocal term for lambdaNew and Old with same coordinates void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - const double lambdaCoef, - uint box); + uint imageSize, double *sumRnew, + double *sumInew, double &energyRecipNew, + const double lambdaCoef, uint box); -void CallSwapReciprocalGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - const bool insert, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + const bool insert, double &energyRecipNew, uint box); -void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, - uint imageSize, - double *sumRnew, - double *sumInew, - uint box); +void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, + double *sumRnew, double *sumInew, uint box); -__global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, - double *gpu_aForceRecy, - double *gpu_aForceRecz, - double *gpu_mForceRecx, - double *gpu_mForceRecy, - double *gpu_mForceRecz, - double *gpu_particleCharge, - int *gpu_particleMol, - bool *gpu_particleHasNoCharge, - bool *gpu_particleUsed, - int *gpu_startMol, - int *gpu_lengthMol, - double *gpu_alpha, - double *gpu_alphaSq, - double constValue, - int imageSize, - double *gpu_kx, - double *gpu_ky, - double *gpu_kz, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - bool *gpu_isFraction, - int *gpu_molIndex, - double *gpu_lambdaCoulomb, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - double axx, - double axy, - double axz, - int box, - int atomCount); +__global__ void BoxForceReciprocalGPU( + double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz, + double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, + double *gpu_particleCharge, int *gpu_particleMol, + bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount); -__global__ void BoxReciprocalSumsGPU(double * gpu_x, - double * gpu_y, - double * gpu_z, - double * gpu_kx, - double * gpu_ky, - double * gpu_kz, - int atomNumber, - double * gpu_particleCharge, - double * gpu_sumRnew, - double * gpu_sumInew, +__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y, + double *gpu_z, double *gpu_kx, + double *gpu_ky, double *gpu_kz, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, int imageSize); __global__ void MolReciprocalGPU(double *gpu_cx, double *gpu_cy, double *gpu_cz, double *gpu_nx, double *gpu_ny, double *gpu_nz, double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_prefactRef, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_energyRecipNew, int imageSize); -__global__ void ChangeLambdaMolReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - double *gpu_energyRecipNew, - double lambdaCoef, - int imageSize); +__global__ void ChangeLambdaMolReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, double *gpu_sumRref, + double *gpu_sumIref, double *gpu_prefactRef, double *gpu_energyRecipNew, + double lambdaCoef, int imageSize); __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, + double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - const bool insert, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, + double *gpu_prefactRef, const bool insert, + double *gpu_energyRecipNew, int imageSize); -__global__ void BoxReciprocalGPU(double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_energyRecip, +__global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew, + double *gpu_sumInew, double *gpu_energyRecip, int imageSize); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index 219a6cafb..79360471a 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,13 +125,14 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], - vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], - vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, - sc_coul, sc_sigma_6, sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, - vars->gpu_expConst, vars->gpu_molIndex, vars->gpu_lambdaVDW, - vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box); + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], + vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_Invcell_z[box], vars->gpu_nonOrth, sc_coul, sc_sigma_6, + sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, + vars->gpu_molIndex, vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, + vars->gpu_isFraction, box); checkLastErrorCUDA(__FILE__, __LINE__); cudaDeviceSynchronize(); // ReduceSum // Virial of LJ @@ -302,10 +303,10 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_aForcex, vars->gpu_aForcey, vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey, vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power, @@ -577,9 +578,9 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], - gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, - lambdaCoulomb, gpu_count[0], kA, kB); + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], + gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, + sc_power, lambdaCoulomb, gpu_count[0], kA, kB); gpu_rT11[threadID] += pRF * (virComponents.x * diff_com.x); gpu_rT22[threadID] += pRF * (virComponents.y * diff_com.y); @@ -599,25 +600,24 @@ __global__ void BoxInterForceGPU( } } -__global__ void -BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, - int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, - double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, - double3 halfAx, bool electrostatic, double *gpu_particleCharge, - int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn, - double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, - int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, - int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez, - double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, - int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, - bool *gpu_isFraction, int box) { +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) { __shared__ double shr_cutoff; __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs; __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex; @@ -708,9 +708,10 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], - gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, - sc_power, lambdaCoulomb, gpu_count[0], kA, kB); + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], + gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, + sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, + kB); } } @@ -868,12 +869,14 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_alphaSq, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -886,15 +889,18 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirParticleGPU(distSq, qi_qj, + gpu_ewald, gpu_alpha, + gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -909,13 +915,15 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -928,15 +936,17 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -950,13 +960,15 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,15 +980,17 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -990,13 +1004,14 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb) { + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } if (sc_coul) { @@ -1009,21 +1024,20 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return gpu_lambdaCoulomb * CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, - double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1) { +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -1054,11 +1068,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, @@ -1079,7 +1093,8 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_alphaSq, gpu_rCut); + gpu_alpha, gpu_alphaSq, + gpu_rCut); } } diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 072939c24..87aaca0c8 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -2,407 +2,244 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_FORCE_CUDA_KERNEL_H #define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include "XYZArray.h" #include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -#include "ConstantDefinitionsCUDAKernel.cuh" #include "CalculateMinImageCUDAKernel.cuh" +#include "ConstantDefinitionsCUDAKernel.cuh" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" +#include -void CallBoxForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, + const std::vector> &neighborList, const std::vector &mapParticleToCell, - XYZArray const &coords, - BoxDimensions const &boxAxes, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - double *aForcex, - double *aForcey, - double *aForcez, - double *mForcex, - double *mForcey, - double *mForcez, - int atomCount, - int molCount, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, + const std::vector &particleMol, double &REn, + double &LJEn, double *aForcex, double *aForcey, + double *aForcez, double *mForcex, double *mForcey, + double *mForcez, int atomCount, int molCount, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, uint const box); -void CallBoxInterForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, - const std::vector &cellStartIndex, - const std::vector > &neighborList, - const std::vector &mapParticleToCell, - XYZArray const ¤tCoords, - XYZArray const ¤tCOM, - BoxDimensions const& boxAxes, - bool electrostatic, - const std::vector &particleCharge, - const std::vector &particleKind, - const std::vector &particleMol, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - double &vT11, - double &vT12, - double &vT13, - double &vT22, - double &vT23, - double &vT33, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); +void CallBoxInterForceGPU( + VariablesCUDA *vars, const std::vector &cellVector, + const std::vector &cellStartIndex, + const std::vector> &neighborList, + const std::vector &mapParticleToCell, XYZArray const ¤tCoords, + XYZArray const ¤tCOM, BoxDimensions const &boxAxes, + bool electrostatic, const std::vector &particleCharge, + const std::vector &particleKind, const std::vector &particleMol, + double &rT11, double &rT12, double &rT13, double &rT22, double &rT23, + double &rT33, double &vT11, double &vT12, double &vT13, double &vT22, + double &vT23, double &vT33, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); -void CallVirialReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallVirialReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const ¤tCOMDiff, const std::vector &particleCharge, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - uint imageSize, - double constVal, - uint box); + double &rT11, double &rT12, double &rT13, + double &rT22, double &rT23, double &rT33, + uint imageSize, double constVal, uint box); -__global__ void BoxForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - double *gpu_aForcex, - double *gpu_aForcey, - double *gpu_aForcez, - double *gpu_mForcex, - double *gpu_mForcey, - double *gpu_mForcez, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void BoxInterForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comx, - double *gpu_comy, - double *gpu_comz, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double *gpu_vT11, - double *gpu_vT12, - double *gpu_vT13, - double *gpu_vT22, - double *gpu_vT23, - double *gpu_vT33, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxInterForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, + double *gpu_rT22, double *gpu_rT23, double *gpu_rT33, double *gpu_vT11, + double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23, + double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, + double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void VirialReciprocalGPU(double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comDx, - double *gpu_comDy, - double *gpu_comDz, - double *gpu_kxRef, - double *gpu_kyRef, - double *gpu_kzRef, - double *gpu_prefactRef, - double *gpu_hsqrRef, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_particleCharge, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double constVal, - uint imageSize, - uint atomNumber); +__global__ void VirialReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comDx, + double *gpu_comDy, double *gpu_comDz, double *gpu_kxRef, double *gpu_kyRef, + double *gpu_kzRef, double *gpu_prefactRef, double *gpu_hsqrRef, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_particleCharge, + double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, double *gpu_rT22, + double *gpu_rT23, double *gpu_rT33, double constVal, uint imageSize, + uint atomNumber); -__device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn, - int gpu_isMartini, - int gpu_VDW_Kind, - int gpu_count, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst); +__device__ double +CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind, + int gpu_count, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double +CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1, - int index, - double gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double CalcCoulombVirSwitchMartiniGPU( + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double sc_sigma_6, + double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); -__device__ double CalcVirExp6GPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_rMin, double *gpu_rMaxSq, - double *gpu_expConst, + uint sc_power, double gpu_lambdaVDW); +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn); +__device__ double CalcVirExp6GPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirExp6GPU(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn, + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn); +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn); - +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn); // Have to move the implementation for some functions here // since CUDA doesn't allow __global__ to call __device__ // from different files // Wanted to call CalcCoulombForceGPU() from CalculateEnergyCUDAKernel.cu file -__device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, - int gpu_VDW_Kind, int gpu_ewald, - int gpu_isMartini, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCutCoulomb, - double gpu_diElectric_1, - double *gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb, - int gpu_count, int kind1, - int kind2) -{ - if((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { +__device__ inline double CalcCoulombForceGPU( + double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, + int gpu_isMartini, double gpu_alpha, double gpu_alphaSq, + double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb, int gpu_count, int kind1, int kind2) { + if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { return 0.0; } int index = FlatIndexGPU(kind1, kind2, gpu_count); - if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, gpu_diElectric_1, - index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + if (gpu_VDW_Kind == GPU_VDW_STD_KIND) { + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { + return CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, + gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCutCoulomb, index, + gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } - #endif /*GOMC_CUDA*/ #endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 3584e46c2..c5cabf281 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -2,17 +2,18 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H #define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include +#include "EnsemblePreprocessor.h" #include "GeomLib.h" #include "VariablesCUDA.cuh" -#include "EnsemblePreprocessor.h" +#include +#include #define GPU_VDW_STD_KIND 0 #define GPU_VDW_SHIFT_KIND 1 @@ -21,12 +22,12 @@ along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA -#include -#include -#include #include "EnsemblePreprocessor.h" #include "NumLib.h" +#include +#include +#include -//Need a separate float constant for device code with the MSVC compiler -//See CUDA Programming Guide section I.4.13 for details +// Need a separate float constant for device code with the MSVC compiler +// See CUDA Programming Guide section I.4.13 for details static const __device__ double qqFactGPU = num::qqFact; -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) -{ +#define gpuErrchk(ans) \ + { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, + bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) + exit(code); } } -inline void checkLastErrorCUDA(const char *file, int line) -{ +inline void checkLastErrorCUDA(const char *file, int line) { cudaError_t code = cudaGetLastError(); if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); exit(code); } } -inline void printFreeMemory() -{ - size_t free_byte ; - size_t total_byte ; - cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ; +inline void printFreeMemory() { + size_t free_byte; + size_t total_byte; + cudaError_t cuda_status = cudaMemGetInfo(&free_byte, &total_byte); - if ( cudaSuccess != cuda_status ) { + if (cudaSuccess != cuda_status) { printf("Error: cudaMemGetInfo fails, %s \n", - cudaGetErrorString(cuda_status) ); + cudaGetErrorString(cuda_status)); exit(1); } - double free_db = (double)free_byte ; - double total_db = (double)total_byte ; - double used_db = total_db - free_db ; + double free_db = (double)free_byte; + double total_db = (double)total_byte; + double used_db = total_db - free_db; printf("GPU memory usage: used = %f, free = %f MB, total = %f MB\n", - used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, total_db / 1024.0 / 1024.0); + used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, + total_db / 1024.0 / 1024.0); } -class VariablesCUDA -{ +class VariablesCUDA { public: - VariablesCUDA() - { + VariablesCUDA() { gpu_sigmaSq = NULL; gpu_epsilon_Cn = NULL; gpu_n = NULL; @@ -94,7 +96,7 @@ public: int *gpu_VDW_Kind; int *gpu_isMartini; int *gpu_count; - int *gpu_startAtomIdx; //start atom index of the molecule + int *gpu_startAtomIdx; // start atom index of the molecule double *gpu_rCut, *gpu_rCutSq; double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow; From 4bd32799cfcf3ae2e128d5625f92e95241335380 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Mon, 20 Jan 2025 14:32:29 -0500 Subject: [PATCH 14/16] Recognize newer OpenMP version date-codes --- src/Main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Main.cpp b/src/Main.cpp index 262b151d5..688633590 100644 --- a/src/Main.cpp +++ b/src/Main.cpp @@ -113,12 +113,12 @@ int main(int argc, char *argv[]) { #endif // Print OpenMP version if recognized or OpenMP date code if not recognized. #ifdef _OPENMP - std::unordered_map omp_map{ + std::unordered_map omp_map{ {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0 Preview 1"}, - {201811, "5.0"}}; - std::unordered_map::const_iterator match = - omp_map.find(_OPENMP); + {201811, "5.0"}, {202011, "5.1"}, {202111, "5.2"}, + {202411, "6.0"}}; + auto match = omp_map.find(_OPENMP); if (match == omp_map.end()) printf("%-40s %u\n", "Info: Compiled with OpenMP Version", _OPENMP); else From ec616550d09e1876e587f4859375b2d3017f5dde Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Mon, 20 Jan 2025 14:34:42 -0500 Subject: [PATCH 15/16] Terminate GOMC (with an error message) if the initial configuration has infinite energy, such as two atoms with identical coordinates --- src/Simulation.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Simulation.cpp b/src/Simulation.cpp index b63d525ce..7275eab33 100644 --- a/src/Simulation.cpp +++ b/src/Simulation.cpp @@ -75,6 +75,14 @@ Simulation::~Simulation() { void Simulation::RunSimulation(void) { GOMC_EVENT_START(1, GomcProfileEvent::MC_RUN); double startEnergy = system->potential.totalEnergy.total; + if (!std::isfinite(startEnergy)) { + std::cout + << "Initial system has non-finite energy. This is usually caused" + " by two or more atoms in the initial configuration having " + "identical coordinates. Please correct your input file and rerun.\n"; + + exit(EXIT_FAILURE); + } if (totalSteps == 0) { for (int i = 0; i < (int)frameSteps.size(); i++) { if (i == 0) { From a3138bf669ddb0b148f729cf75e23272b55fd4ac Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Mon, 20 Jan 2025 14:50:34 -0500 Subject: [PATCH 16/16] Reformat updated code and revise comment --- src/Main.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Main.cpp b/src/Main.cpp index 688633590..7b7f9c37e 100644 --- a/src/Main.cpp +++ b/src/Main.cpp @@ -111,13 +111,12 @@ int main(int argc, char *argv[]) { #if defined _OPENMP && _OPENMP < 201511 printf("Warning: OpenMP version < 4.5. GOMC will not run optimally!\n"); #endif - // Print OpenMP version if recognized or OpenMP date code if not recognized. + // Print the OpenMP version if recognized or instead the OpenMP date code. #ifdef _OPENMP std::unordered_map omp_map{ - {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, - {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0 Preview 1"}, - {201811, "5.0"}, {202011, "5.1"}, {202111, "5.2"}, - {202411, "6.0"}}; + {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, + {201511, "4.5"}, {201611, "5.0 Preview 1"}, {201811, "5.0"}, + {202011, "5.1"}, {202111, "5.2"}, {202411, "6.0"}}; auto match = omp_map.find(_OPENMP); if (match == omp_map.end()) printf("%-40s %u\n", "Info: Compiled with OpenMP Version", _OPENMP);