diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e0f63b54..3ec8ee57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(progress prg_chebyshev_mod.F90 prg_densitymatrix_mod.F90 prg_dos_mod.F90 + prg_ewald_mod.F90 prg_extras_mod.F90 prg_genz_mod.F90 prg_graph_mod.F90 @@ -46,6 +47,7 @@ add_library(progress prg_subgraphloop_mod.F90 prg_system_mod.F90 prg_timer_mod.F90 + prg_xlbokernel_mod.F90 prg_xlbo_mod.F90) #prg_xlkernel_mod.F90) @@ -94,6 +96,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_chebyshev_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_densitymatrix_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_dos_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_ewald_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_extras_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_genz_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_graph_mod.mod @@ -122,6 +125,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_system_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_timer_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbo_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbokernel_mod.mod # ${CMAKE_CURRENT_BINARY_DIR}/prg_xlkernel_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/slaterkosterforce_latte_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/tbparams_latte_mod.mod diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 new file mode 100644 index 00000000..5dbb24ea --- /dev/null +++ b/src/prg_ewald_mod.F90 @@ -0,0 +1,1222 @@ +! Ewald sum routines for kernel calculation +module prg_ewald_mod + + use bml + use prg_timer_mod + use prg_parallel_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: Ewald_Real_Space_Single + public :: Ewald_Real_Space_Single_latte + public :: Ewald_Real_Space + public :: Ewald_Real_Space_latte + public :: Ewald_Real_Space_Test + public :: Ewald_Real_Space_Matrix_latte + public :: Ewald_k_space_latte_single + public :: Ewald_k_space_latte + public :: Ewald_k_space_Test + public :: Ewald_k_space_Matrix_latte + +contains + + !> Find Coulomb potential on site I from single charge at site J + subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & + DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) + real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RXYZ(1,J)+k*box(1,1) + Rb(2) = RXYZ(2,J)+m*box(2,2) + Rb(3) = RXYZ(3,J)+l*box(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + endif + enddo + enddo + enddo + + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single_latte + + subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) + + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J + real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RX(J)+k*Lbox(1) + Rb(2) = RY(J)+m*Lbox(2) + Rb(3) = RZ(J)+l*Lbox(3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + enddo + enddo + + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single + + subroutine Ewald_Real_Space_Matrix_latte(E,RXYZ,Box,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, Nr_Elem + real(PREC), intent(in) :: COULACC + real(PREC), intent(out) :: E(Nr_atoms,Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: I,J,K, ccnt, newj, PBCI,PBCJ,PBCK + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + E = 0.0 + + !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(E,U,Element_Pointer,RXYZ,totnebcoul,nebcoul,coulcut,calpha) + + do I = 1,Nr_atoms + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + E(I,J) = E(I,J) + CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + E(I,J) = E(I,J) - EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + E(I,J) = E(I,J) - ((EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo + enddo + !$OMP END PARALLEL DO + + E = KECONST*E + + end subroutine Ewald_Real_Space_Matrix_latte + + subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & + DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_latte + + subroutine Ewald_Real_Space_Test(COULOMBV,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,nnRx,nnRy,nnRz,nrnnlist,nnType,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nnI,J,Rb,Rab,dR,MAGR,MAGR2,TJ,DC,Z,NUMREP_ERFC,CA) & + ! !$OMP REDUCTION(+:COULOMBV) + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo + ! !$OMP END PARALLEL DO + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Test + + subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space + + subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo + enddo + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte + + subroutine Ewald_k_Space_Matrix_latte(E,RXYZ,Box,Nr_atoms,COULACC,Max_Nr_Neigh,nebcoul,totnebcoul) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh,nebcoul(4,Max_Nr_Neigh,Nr_atoms),totnebcoul(Nr_atoms) + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: E(Nr_atoms,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, newj, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + !COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + E = 0.0 + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + ! COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + ! SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,J) + do I = 1,Nr_atoms + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + E(I,J) = E(I,J) + KEPREF*(COSLIST(I)*COSLIST(J)+SINLIST(I)*SINLIST(J)) + ! COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + E(I,I) = E(I,I) - CORRFACT + enddo + !$OMP END PARALLEL DO + + !KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo + enddo + + ! Point self energy + !CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + !COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_Matrix_latte + + subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULACC) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, J + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2 + real(PREC) :: CORRFACT,FOURCALPHA2 + real(PREC) :: RECIPVECS(3,3) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: IDOT, JDOT, COSJDOT, SINJDOT, KEPREF + + integer :: I,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,IDOT,JDOT,COSJDOT,SINJDOT,L,M,N,MMIN,MMAX,NMIN,NMAX,L11,M22,K,K2,PREFACTOR,KEPREF) + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M22 = M*RECIPVECS(2,2) + + do N = NMIN,NMAX + K(1) = L11 + K(2) = M22 + K(3) = N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + KEPREF = KECONST*PREFACTOR + JDOT = K(1)*RXYZ(1,J) + K(2)*RXYZ(2,J) + K(3)*RXYZ(3,J) + SINJDOT = sin(JDOT) + COSJDOT = cos(JDOT) + do I = 1,Nr_atoms + IDOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + COULOMBV(I) = COULOMBV(I) + KEPREF*DELTAQ(J)*(COSJDOT*cos(IDOT)+SINJDOT*sin(IDOT)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte_single + + subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + ! + implicit none + ! + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + ! + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/LBox(1) + RECIPVECS(2,2) = TWO*pi/LBox(2) + RECIPVECS(3,3) = TWO*pi/LBox(3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + ! + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo + enddo + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_Test + +end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 5c30947c..633ea1c8 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -5,11 +5,13 @@ !! module prg_implicit_fermi_mod + use omp_lib use bml use prg_normalize_mod use prg_densitymatrix_mod use prg_timer_mod use prg_parallel_mod + use prg_ewald_mod implicit none @@ -18,13 +20,173 @@ module prg_implicit_fermi_mod integer, parameter :: dp = kind(1.0d0) public :: prg_implicit_fermi + public :: prg_implicit_fermi_save_inverse public :: prg_implicit_fermi_zero public :: prg_test_density_matrix public :: prg_implicit_fermi_response + public :: prg_implicit_fermi_first_order_response public :: prg_finite_diff contains + !> Recursive Implicit Fermi Dirac for finite temperature. + !! \param Inv_bml Inverses generated by algorithm. + !! \param h_bml Input Hamiltonian matrix. + !! \param p_bml Output density matrix. + !! \param nsteps Number of recursion steps. + !! \param nocc Number of occupied states. + !! \param mu Shifted chemical potential + !! \param beta Input inverse temperature. + !! \param occErrLimit Occupation error limit. + !! \param threshold Threshold for multiplication. + !! \param tol Tolerance for linear system solver. + !! \param SCF_IT The current SCF iteration. + !! \param occiter Counts the total nr of DM calculations during MD. + !! See \cite{niklasson2003} + subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, & + mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter) + + implicit none + + type(bml_matrix_t), intent(in) :: h_bml + type(bml_matrix_t), intent(inout) :: p_bml, Inv_bml(nsteps) + integer, intent(in) :: nsteps, SCF_IT + real(dp), intent(in) :: nocc, threshold + real(dp), intent(in) :: tol + real(dp), intent(in) :: occErrLimit, beta + real(dp), intent(inout) :: mu + integer, intent(inout) :: occiter + + type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml + real(dp) :: trdPdmu, trP0, occErr, alpha, newerr + real(dp) :: cnst, ofactor, mustep, preverr + real(dp), allocatable :: trace(:), gbnd(:) + character(20) :: bml_type + integer :: N, M, i, iter, muadj, prev, maxiter + + bml_type = bml_get_type(h_bml) + N = bml_get_N(h_bml) + M = bml_get_M(h_bml) + + allocate(trace(2)) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) + + occErr = 10.0_dp + newerr = 1000_dp + preverr = 1000_dp + alpha = 1.0_dp + prev = 1 + iter = 0 + maxiter = 30 + cnst = beta/(1.0_dp*2**(nsteps+2)) + + if (SCF_IT .eq. 1) then + alpha = 4.0_dp + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + ! Generate good starting guess for (2*(P2-P)+1)^-1 using conjugate gradient + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + II + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + do i = 1, nsteps + call bml_copy(I_bml, Inv_bml(i)) + enddo + end if + + do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < maxiter) + iter = iter + 1 + muadj = 0 + write(*,*) 'mu =', mu + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + + do i = 1, nsteps + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + I + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + ! Find inverse ai = (2*(P2-P)+I)^-1 + !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, w_bml, d_bml, aux_bml, tol, threshold) + !call bml_copy(Inv_bml(i),ai_bml) + if (iter .eq. 1) then + call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + endif + call prg_newtonschulz(y_bml, Inv_bml(i), d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(Inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + enddo + + trP0 = bml_trace(p_bml) + trdPdmu = beta*(trP0 - bml_sum_squares(p_bml)) ! sum p(i,j)**2 + occErr = abs(trP0 - nocc) + write(*,*) 'occerr =', occErr + + ! If occupation error is too large, do bisection method + if (occerr > 1.0_dp) then + if (nocc-trP0 < 0.0_dp .and. prev .eq. -1) then + prev = -1 + else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then + prev = 1 + else if (nocc-trP0 > 0.0_dp .and. prev .eq. -1) then + prev = 1 + alpha = alpha/2 + else + prev = -1 + alpha = alpha/2 + endif + mu = mu + prev*alpha + muadj = 1 + + ! Otherwise do Newton + else if (occErr .gt. occErrLimit) then + mustep = (nocc -trP0)/trdPdmu + mu = mu + mustep + muadj = 1 + preverr = occErr + end if + enddo + + if (iter .ge. maxiter) then + write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' + end if + ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. + ! For now we recompute the DM one extra time if mu was adjusted. + !if (muadj .eq. 1) then + ! Adjust occupation + ! call bml_copy(p_bml, d_bml) + ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) + ! ofactor = ((nocc - trP0)/trdPdmu) * beta + ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !end if + occiter = occiter + iter + call bml_scale(2.0_dp,p_bml) + deallocate(trace) + + call bml_deallocate(p2_bml) + call bml_deallocate(w_bml) + call bml_deallocate(d_bml) + call bml_deallocate(y_bml) + call bml_deallocate(aux_bml) + call bml_deallocate(ai_bml) + call bml_deallocate(I_bml) + + end subroutine prg_implicit_fermi_save_inverse + !> Recursive Implicit Fermi Dirac for finite temperature. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. @@ -68,7 +230,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux1_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux2_bml) endif @@ -106,7 +268,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & else call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, aux1_bml, w_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -123,9 +285,9 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux1_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux1_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo @@ -134,8 +296,10 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if write(*,*) "mu =", mu enddo @@ -146,7 +310,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("P adjusted occupation",p_bml,0,10,0,10) deallocate(trace) @@ -155,7 +319,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_deallocate(aux1_bml) call bml_deallocate(aux2_bml) endif @@ -185,7 +349,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, real(dp), intent(in) :: mu, threshold real(dp), intent(inout), optional :: tol - type(bml_matrix_t) :: w_bml, y_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml + type(bml_matrix_t) :: w_bml, y_bml, c_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml real(dp) :: cnst real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type @@ -201,6 +365,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, c_bml) if (method .eq. 1) then call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) @@ -221,7 +386,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_copy(p2_bml, y_bml) call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, c_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -232,9 +397,9 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, c_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, c_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo endif @@ -246,6 +411,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) + call bml_deallocate(c_bml) if (method .eq. 1) then call bml_deallocate(ai_bml) call bml_deallocate(I_bml) @@ -253,6 +419,111 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, end subroutine prg_implicit_fermi_zero + + !> Calculate first order density matrix response to perturbations using Implicit Fermi Dirac. + !! \param H0_bml Input Hamiltonian matrix. + !! \param H1_bml Input First order perturbation of H0. + !! \param P0_bml Output density matrix. + !! \param P1_bml Output First order density matrix response. + !! \param nsteps Number of recursion steps. + !! \param mu0 Shifted chemical potential. + !! \param beta Input inverse temperature. + !! \param nocc Number of occupied states. + !! \param threshold Threshold for matrix algebra. + !! See \cite{niklasson2015} + subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bml, & + Inv_bml, nsteps, mu0, beta, nocc, threshold) + + implicit none + + type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, Inv_bml(nsteps) + type(bml_matrix_t), intent(inout) :: P0_bml,P1_bml + real(dp), intent(in) :: mu0, threshold + real(dp) :: mu1 + real(dp), intent(in) :: beta, nocc + integer, intent(in) :: nsteps + type(bml_matrix_t) :: B0_bml, B_bml, C_bml, C0_bml + character(20) :: bml_type + real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst + integer :: N, M, i, j, k + + bml_type = bml_get_type(H0_bml) + N = bml_get_N(H0_bml) + M = bml_get_M(H0_bml) + + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B0_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) + + cnst = beta/(2**(2+nsteps)) + + ! P0 = 0.5*II - cnst*(H0-mu0*II) + call bml_copy(H0_bml, P0_bml) + call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) + + ! P1 = - cnst*H1 + call bml_copy(H1_bml, P1_bml) + call bml_scale(-1.0_dp*cnst, P1_bml) + do i = 1, nsteps + + ! Calculate coefficient matrices + ! C0 = P0^2 + call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! C = P0*P1+P1*P0, B = 2(P1 - C) + call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_copy(P1_bml, B_bml) + call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P0 + call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) + ! Get next P1 + ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) + call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + enddo + + ! do i = 1, nsteps-1 + ! D = A^-1*P0 + ! call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold) + ! B0 = A^-1*P0^2 + ! call bml_copy(B_bml,B0_bml) + ! B = I + D -P0*D + ! call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold) + ! P1 = 2D*P1(I+D-P0*D) + ! call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + ! call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold) + ! enddo + ! call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold) + ! call bml_copy(P1_bml, B_bml) + ! call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P1 + ! call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + + + ! dPdmu = beta*P0(I-P0) + !call bml_copy(P0_bml, B_bml) + !call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + !call bml_multiply(P0_bml, B_bml, C_bml, beta, 0.0_dp, threshold) + !dPdmu_trace = bml_trace(C_bml) + !p1_trace = bml_trace(P1_bml) + !mu1 = - p1_trace/dPdmu_trace + if (abs(dPdmu_trace) > 1e-8) then + !call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + endif + + call bml_deallocate(B_bml) + call bml_deallocate(B0_bml) + call bml_deallocate(C_bml) + call bml_deallocate(C0_bml) + + + end subroutine prg_implicit_fermi_first_order_response + + !> Calculate density matrix response to perturbations using Implicit Fermi Dirac. !! \param H0_bml Input Hamiltonian matrix. !! \param H1_bml, H2_bml, H3_bml Input First to third order perturbations of H0. @@ -279,7 +550,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P real(dp), allocatable, intent(inout) :: mu(:) real(dp), intent(in) :: beta, occ_tol, lin_tol, nocc integer, intent(in) :: nsteps - type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, C0_bml, T_bml, Ti_bml + type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, tmp3_bml, C0_bml, T_bml, Ti_bml type(bml_matrix_t), allocatable :: B_bml(:), P_bml(:), C_bml(:), H_bml(:) real(dp), allocatable :: p_trace(:), trace(:) character(20) :: bml_type @@ -305,6 +576,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P end do call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp1_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp3_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, T_bml) @@ -371,10 +643,10 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P call bml_scale_add_identity(T_bml, 2.0_dp, 1.0_dp, threshold) ! Find T-inverse if (i .eq. 1) then - call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, 0.01_dp, threshold) + call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, tmp3_bml,0.01_dp, threshold) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) end if - call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, lin_tol, threshold) + call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, tmp3_bml, I_bml, lin_tol, threshold) ! Get next P0 call bml_multiply(Ti_bml, C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) ! Get next P1 @@ -608,31 +880,38 @@ end subroutine prg_setup_linsys !! \param tol Convergence criterion (Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, tol, threshold) + subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, threshold) implicit none - type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml - type(bml_matrix_t), intent(in) :: a_bml + type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml, d_bml + type(bml_matrix_t), intent(in) :: a_bml, I_bml real(dp), intent(in) :: threshold, tol - real(dp) :: norm - integer :: i + real(dp) :: err,prev_err,scaled_tol + integer :: i,N,N2 - norm = 1.0 + N = bml_get_N(a_bml) + err = 100000.0 i = 0 - do while(norm > tol) + do while(err > tol) + !write(*,*) 'iter = ', i + !write(*,*) 'ns error =', err call bml_copy(ai_bml, tmp_bml) call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold) call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold) - norm = bml_fnorm(r_bml) - ! write(*,*) "norm = ", norm - if (norm < tol) then - exit - end if - call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + prev_err = err + err = bml_fnorm(r_bml) + !write(*,*) "err = ", err + !write(*,*) "prev_err = ", prev_err + if (10*prev_err < err) then + write(*,*) 'NS did not converge, calling conjugate gradient' + call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.00001_dp, threshold) + else + call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + endif i = i + 1 enddo - ! write(*,*) "Number of NS iterations:", i + !write(*,*) "Number of NS iterations:", i end subroutine prg_newtonschulz ! Preconditioned CG, preconditioner inverse diagonal of A @@ -717,47 +996,47 @@ end subroutine prg_pcg !! \param w_bml Auxillary matrix !! \param cg_tol Convergence condition (OBS squared Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_conjgrad(A_bml, p_bml, p2_bml, d_bml, w_bml, cg_tol, threshold) + subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, threshold) implicit none - type(bml_matrix_t), intent(in) :: A_bml - type(bml_matrix_t), intent(inout) :: p_bml, p2_bml, d_bml, w_bml + type(bml_matrix_t), intent(in) :: A_bml, p2_bml + type(bml_matrix_t), intent(inout) :: p_bml, tmp_bml, d_bml, w_bml real(dp), intent(in) :: cg_tol, threshold real(dp) :: alpha, beta integer :: k real(dp) :: r_norm_old, r_norm_new - call bml_multiply(A_bml, p_bml, p2_bml, -1.0_dp, 1.0_dp, threshold) - r_norm_new = bml_sum_squares(p2_bml) + call bml_copy(p2_bml,tmp_bml) + call bml_multiply(A_bml, p_bml, tmp_bml, -1.0_dp, 1.0_dp, threshold) + r_norm_new = bml_sum_squares(tmp_bml) k = 0 do while (r_norm_new .gt. cg_tol) - ! write(*,*) r_norm_new + ! write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then - write(*,*) r_norm_new - call bml_copy(p2_bml, d_bml) + call bml_copy(tmp_bml, d_bml) else beta = r_norm_new/r_norm_old - call bml_add(d_bml, p2_bml, beta, 1.0_dp, threshold) + call bml_add(d_bml, tmp_bml, beta, 1.0_dp, threshold) endif call bml_multiply(A_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) alpha = r_norm_new/bml_trace_mult(d_bml, w_bml) call bml_add(p_bml, d_bml, 1.0_dp, alpha, threshold) - call bml_add(p2_bml, w_bml, 1.0_dp, -alpha, threshold) + call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold) r_norm_old = r_norm_new - r_norm_new = bml_sum_squares(p2_bml) + r_norm_new = bml_sum_squares(tmp_bml) if (k .gt. 50) then write(*,*) "Conjugate gradient is not converging" stop endif enddo - write(*,*) "Number of CG-iterations:", k + !write(*,*) "Number of CG-iterations:", k end subroutine prg_conjgrad @@ -872,8 +1151,10 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if !write(*,*) "mu = ", mu enddo @@ -884,7 +1165,7 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr call bml_multiply(p_bml, aux_bml, aux1_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("Diagonalization - Adjusted occupation",p_bml,0,10,0,10) call bml_deallocate(eigenvectors_bml) diff --git a/src/prg_syrotation_mod.F90 b/src/prg_syrotation_mod.F90 index 44d7f91e..16f28b7e 100644 --- a/src/prg_syrotation_mod.F90 +++ b/src/prg_syrotation_mod.F90 @@ -190,7 +190,7 @@ subroutine prg_rotate(rot,r,verbose) v2(2)=rot%v2(2) v2(3)=rot%v2(3) - vQ(1)=rot%vQ(1) !Rotation center + vQ(1)=rot%vQ(1) !Rotation center vQ(2)=rot%vQ(2) vQ(3)=rot%vQ(3) @@ -232,7 +232,7 @@ subroutine prg_rotate(rot,r,verbose) v2=pq2-vQ endif - vtr(1)=0.0_dp !Translation + vtr(1)=0.0_dp !Translation vtr(2)=0.0_dp vtr(3)=0.0_dp diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 new file mode 100644 index 00000000..3a25c8ce --- /dev/null +++ b/src/prg_xlbokernel_mod.F90 @@ -0,0 +1,867 @@ +!> Pre-conditioned O(N) calculation of the kernel for XL-BOMD. +!! \ingroup PROGRESS +!! \brief Here are subroutines implementing Niklasson's scheme for +!! low-rank, Krylov subspace approximation of the kernel. +module prg_xlbokernel_mod + + use omp_lib + use bml + use prg_normalize_mod + use prg_densitymatrix_mod + use prg_timer_mod + use prg_parallel_mod + use prg_ewald_mod + use prg_implicit_fermi_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: prg_kernel_multirank + public :: prg_kernel_multirank_latte + public :: prg_kernel_matrix_multirank + public :: prg_full_kernel + public :: prg_full_kernel_latte + +contains + + subroutine Invert(A,AI,N) + + implicit none + integer, parameter :: PREC = 8 + integer, intent(in) :: N + real(PREC), intent(in) :: A(N,N) + real(PREC), intent(out) :: AI(N,N) + real(PREC) :: WORK(N+N*N)!, C(N,N) + integer :: LDA, LWORK, M, INFO, IPIV(N) + integer :: I,J,K + + external DGETRF + external DGETRI + + AI = A + LDA = N + M = N + LWORK = N+N*N + + call DGETRF(M, N, AI, LDA, IPIV, INFO) + call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) + + end subroutine Invert + + !> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) + !! \param KRes The low rank approximation + !! \param KK0_bml The pre-conditioner K0. + !! \param Res The residual q[n]-n + !! \param FelTol Relative error tolerance for approximation + !! \param L Number of vectors used. + !! \param LMAX Maximum nr of vectors to use. + !! \param NUMRANK Nr of vectors to use. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param mu The chemical potiential. + !! \param beta Scaled inverse temperature. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy. + !! \param nebcoul Neighbour lists. + !! \param totnebcoul Number of neighbours list. + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Nocc Occupation. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu, Nocc + real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) + real(dp), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(20) :: bml_type + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It,N,MN + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp, start, finish + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml, X_bml, Y_bml + + call timer_prg_init() + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) + + call bml_transpose(Z_bml,ZT_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_set_row(Res_bml,1,Res,1.0_dp*1e-10) + call bml_transpose(KK0_bml,KK0T_bml) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + write(*,*) 'resnorm =', norm2(Res), 'kresnorm =', norm2(dr) + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish + I = I + 1 + !write(*,*) 'dr =', norm2(dr) + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + ! Compute H1 = H(v) + dq_v = v + call prg_timer_start(1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & + dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call prg_timer_stop(1,1) + + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1,Nr_atoms-1 + do K = Hinxlist(It)+1,Hinxlist(It+1) + row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + ! H1 = 1/2(S*H1+H1*S) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + ! H1 = Z^T H1 Z + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + ! D1 = Z D1 Z^T + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute dq/dv + call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,1.0_dp*1e-10) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) + call prg_timer_shutdown() + + end subroutine prg_kernel_multirank_latte + + ! Above routine but for development code + subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_transpose(KK0_bml,KK0T_bml) + call bml_set_row(Res_bml,1,Res,ONE*1e-14) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + + end subroutine prg_kernel_multirank + + !> Compute full inverse Jacobian of q[n]-n (for LATTE) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & + Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & + Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml, & + Nocc,m_rec,threshold,beta,Nr_elem,nebcoul,totnebcoul) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, threshold,beta,Nocc + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) + integer, intent(in) :: Hinxlist(HDIM) + real(PREC), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml + real(PREC), intent(inout) :: mu0 + integer, intent(in) :: Element_Pointer(Nr_atoms),nebcoul(4,Max_Nr_Neigh,Nr_atoms),totnebcoul(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms), err,tol,start,ewaldk,ewaldr,ewaldkacc,ewaldracc,response,respacc + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It,N,MN + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:),E(:,:),EReal(:,:),EKspace(:,:) + type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml, X_bml, Y_bml + character(20) :: bml_type + + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) + call bml_transpose(Z_bml,ZT_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + allocate(EReal(Nr_atoms,Nr_atoms)); allocate(EKspace(Nr_atoms,Nr_atoms)) + allocate(E(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + call Ewald_Real_Space_Matrix_latte(EReal,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,Coulomb_acc,Nebcoul,Totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + call Ewald_k_Space_Matrix_latte(EKspace,RXYZ,Box,Nr_atoms,Coulomb_acc,Max_Nr_Neigh,Nebcoul,Totnebcoul) + E = EReal + EKspace + write(*,*) 'Beginning response calculations' + ewaldracc = 0.0; ewaldkacc = 0.0; respacc = 0.0; + do J = 1,Nr_atoms + write(*,*) 'J =',J + call cpu_time(start) + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + !call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & + ! dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + !Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + call cpu_time(ewaldr) + ewaldracc = ewaldracc + ewaldr-start + !call Ewald_k_Space_latte_single(Coulomb_Pot_k,J,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc) + !Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call cpu_time(ewaldk) + ewaldkacc = ewaldkacc + ewaldk-ewaldr + Coulomb_Pot_dq_v = matmul(E,dq_v) + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms-1 + do K = Hinxlist(I)+1,Hinxlist(I+1) + row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,Nocc,threshold) + call cpu_time(response) + respacc = respacc + response-ewaldk + + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + JJ(It,J) = dq_dv(It) + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + + write(*,*) 'ewaldracc =', ewaldracc + write(*,*) 'ewaldkacc =', ewaldkacc + write(*,*) 'implcit response time =', respacc + call Invert(JJ,KK,Nr_atoms) + + deallocate(row1); deallocate(row2); deallocate(JJ) + deallocate(E); deallocate(EReal); deallocate(EKspace) + call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) + + end subroutine prg_full_kernel_latte + + !> Compute full inverse Jacobian of q[n]-n (for development code) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RX,RY,RZ Nuclear coordinates. + !! \param Lbox Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Type List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param TIMERATIO Parameter for Ewald + !! \param nnRx,nnRy,nnRz Neighbour lists. + !! \param nrnnlist Number of neighbours list. + !! \param nnType Refers to original order of atoms. + !! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param Znuc List of nuclear charges. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param diagonal Auxillary vector. + + subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & + nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Nocc,Znuc,m_rec,threshold,beta,diagonal) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + real(PREC), intent(inout) :: mu0, diagonal(HDIM) + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms) + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + call Ewald_Real_Space_Single(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,I,RX,RY,RZ,LBox, & + dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + ! TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + diagonal = 0.0_dp + do I = 1,HDIM + call bml_set_row(H1_bml,I,diagonal,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms + do K = H_INDEX_START(I),H_INDEX_END(I) + diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,diagonal,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + JJ(It,J) = dq_dv(It) + enddo + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + call Invert(JJ,KK,Nr_atoms) + deallocate(row1); deallocate(row2); deallocate(JJ) + + end subroutine prg_full_kernel + + + ! Compute the low-rank kernel matrix. (For development code) + subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu,Nocc + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: KK0_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(out) :: KRes(Nr_atoms) + integer :: I,J,K,It,col,row + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp,elem + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) + + allocate(row1(HDIM));allocate(row2(HDIM)); + + dr = Res + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, Nocc, threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + + ! Does not work, need to normalize matrix + if ((Fel > FelTol).AND.(I < (LMAX))) then + deallocate(O, M) + else + do row = 1,Nr_atoms + do col = 1,Nr_atoms + elem = 0.0 + do K = 1,L + do J = 1,L + elem = elem + M(J,K)*vi(row,J)*fi(col,K) + enddo + enddo + if (abs(elem) > threshold) then + call bml_set_element(KK0_bml,row,col,elem) + endif + enddo + enddo + deallocate(O,M) + endif + enddo + deallocate(row1); deallocate(row2); + + end subroutine prg_kernel_matrix_multirank + +end module prg_xlbokernel_mod diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cbc484e7..f2d8d5df 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -56,6 +56,7 @@ progress_test(prg_density_T) progress_test(prg_density_T_fermi) progress_test(prg_sp2_basic) progress_test(prg_implicit_fermi) +progress_test(prg_implicit_fermi_save_inverse) progress_test(prg_sp2_alg1_dense) progress_test(prg_sp2_alg2_dense) progress_test(prg_sp2_alg1_ellpack) diff --git a/tests/src/main.F90 b/tests/src/main.F90 index cd2fe3d2..9e437799 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -31,7 +31,8 @@ program main implicit none - integer :: norb, mdim, verbose, i + integer :: norb, mdim, verbose + type(bml_matrix_t) :: inv_bml(10) type(bml_matrix_t) :: ham_bml type(bml_matrix_t) :: rho_bml, rho1_bml type(bml_matrix_t) :: rho_ortho_bml @@ -59,7 +60,7 @@ program main real(dp) :: mineval, maxeval, occerrlimit real(dp), allocatable :: gbnd(:) integer :: minsp2iter, icount, nodesPerPart, occsteps - integer :: norecs + integer :: norecs,occiter,i integer :: maxsp2iter, npts, sp2all_timer, sp2all_timer_init integer, allocatable :: pp(:), signlist(:) real(dp), allocatable :: vv(:) @@ -189,6 +190,8 @@ program main mu = 0.2_dp beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + bml_type = 'ellpack' call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) @@ -196,7 +199,7 @@ program main call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_implicit_fermi(ham_bml, rho1_bml, 10, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 10e-8_dp) + call prg_implicit_fermi(ham_bml, rho1_bml, norecs, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 1e-6_dp) mu = 0.2_dp @@ -205,11 +208,44 @@ program main call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) error_calc = bml_fnorm(rho1_bml) + write(*,*) error_calc + if(error_calc.gt.0.1_dp)then write(*,*) "Error in Implicit Fermi expansion ","Error = ",error_calc error stop endif + case("prg_implicit_fermi_save_inverse") + + mu = 0.2_dp + beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + nocc = 10.0_dp + + do i = 1,norecs + call bml_identity_matrix(bml_type,bml_element_real,dp,norb,norb,inv_bml(i)) + enddo + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) + + mu = 0.2_dp + call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) + write(*,*) mu + call bml_scale(0.5_dp,rho_bml) + call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) + + error_calc = bml_fnorm(rho1_bml) + if(error_calc.gt.0.1_dp)then + write(*,*) "Error in Implicit Fermi expansion save inverse","Error = ",error_calc + error stop + endif + case("prg_sp2_basic") !Sp2 original version call prg_timer_start(loop_timer) @@ -979,11 +1015,13 @@ program main error stop endif + call prg_timer_stop(loop_timer) case("prg_buildzsparse") ! Building inverse overlap factor matrix (Lowdin method) write(*,*) "Testing buildzsparse from prg_genz_mod" + error_tol = 1.0d-2 bml_type = "ellpack" @@ -1005,7 +1043,6 @@ program main call prg_buildZsparse(over_bml,aux_bml,1,mdim,bml_type,zk1_bml,zk2_bml,zk3_bml& &,zk4_bml,zk5_bml,zk6_bml,4,4,3,threshold,threshold,.true.,1) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) error_calc = bml_fnorm(aux_bml) @@ -1095,6 +1132,8 @@ program main call bml_add_deprecated(-1.0_dp,rho_bml,1.0_dp,rho1_bml,0.0_dp) error_calc = bml_fnorm(rho_bml) write(*,*)error_calc + + if(error_calc.gt.error_tol)then write(*,*) "Error is too high", error_calc error stop @@ -1136,7 +1175,6 @@ program main endif - case default write(*,*)"ERROR: unknown test ",test