diff --git a/src/trans/cpu/internal/trgl_mod.F90 b/src/trans/cpu/internal/trgl_mod.F90 new file mode 100644 index 000000000..d56d4d477 --- /dev/null +++ b/src/trans/cpu/internal/trgl_mod.F90 @@ -0,0 +1,745 @@ +! (C) Copyright 2025- ECMWF. +! (C) Copyright 2025- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TRGL_MOD + +USE PARKIND1, ONLY : JPIM + +IMPLICIT NONE + +TYPE TRGL_BUFFERS + INTEGER(KIND=JPIM) :: ISENDCOUNT = -9999 + INTEGER(KIND=JPIM) :: IRECVCOUNT = -9999 + INTEGER(KIND=JPIM) :: INSEND = -9999 + INTEGER(KIND=JPIM) :: INRECV = -9999 + INTEGER(KIND=JPIM) :: IFLDS = 0 + LOGICAL :: LLTRGTOL = .FALSE. + LOGICAL :: LLPGPONLY = .FALSE. + LOGICAL :: LLINDER = .FALSE. + + INTEGER(KIND=JPIM), ALLOCATABLE :: ISENDTOT (:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IRECVTOT (:) + INTEGER(KIND=JPIM), ALLOCATABLE :: ISEND(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IRECV(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IINDEX(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: INDOFF(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTRSEND(:,:,:) + INTEGER(KIND=JPIM), ALLOCATABLE :: ISETWL(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: ISETVL(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: ISETW(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IJPOS(:,:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IPOSPLUS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IFLDA(:,:) +END TYPE TRGL_BUFFERS + +TYPE TRGL_VARS + INTEGER(KIND=JPIM), ALLOCATABLE :: IUVLEVS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IUVPARS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGP2PARS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IFLDOFF(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTROFF(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGP3APARS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGP3ALEVS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGP3BPARS(:) + INTEGER(KIND=JPIM), ALLOCATABLE :: IGP3BLEVS(:) + LOGICAL, ALLOCATABLE :: LLUV(:) + LOGICAL, ALLOCATABLE :: LLGP2(:) + LOGICAL, ALLOCATABLE :: LLGP3A(:) + LOGICAL, ALLOCATABLE :: LLGP3B(:) +END TYPE TRGL_VARS + +CONTAINS + +SUBROUTINE ALLOCATE_BUFFERS_CST(SELF) + USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC + USE TPM_TRANS ,ONLY : NGPBLKS + + CLASS(TRGL_BUFFERS), INTENT(INOUT) :: SELF + ALLOCATE (SELF%ISENDTOT (NPROC)) + ALLOCATE (SELF%IRECVTOT (NPROC)) + ALLOCATE (SELF%ISEND (NPROC)) + ALLOCATE (SELF%IRECV (NPROC)) + ALLOCATE (SELF%IINDEX(D%NLENGTF)) + ALLOCATE (SELF%INDOFF(NPROC)) + ALLOCATE (SELF%IGPTRSEND(2,NGPBLKS,NPRTRNS)) + ALLOCATE (SELF%ISETWL(NPROC)) + ALLOCATE (SELF%ISETVL(NPROC)) + +END SUBROUTINE ALLOCATE_BUFFERS_CST + + +SUBROUTINE ALLOCATE_BUFFERS_SR(SELF, KF_GP) + USE TPM_TRANS ,ONLY : NGPBLKS + + CLASS(TRGL_BUFFERS), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM),INTENT(IN) :: KF_GP + + IF (SELF%LLTRGTOL) THEN + ALLOCATE (SELF%ISETW(SELF%INSEND)) + ALLOCATE (SELF%IJPOS(NGPBLKS,SELF%INSEND)) + ALLOCATE (SELF%IPOSPLUS(SELF%INSEND)) + ALLOCATE (SELF%IFLDA(KF_GP,SELF%INSEND)) + ELSE + ALLOCATE (SELF%ISETW(SELF%INRECV)) + ALLOCATE (SELF%IJPOS(NGPBLKS,SELF%INRECV)) + ALLOCATE (SELF%IPOSPLUS(SELF%INRECV)) + ALLOCATE (SELF%IFLDA(KF_GP,SELF%INRECV)) + ENDIF +END SUBROUTINE ALLOCATE_BUFFERS_SR + +SUBROUTINE TRGL_ALLOCATE_VARS(SELF, KF_GP, KF_FS) + USE TPM_TRANS ,ONLY : NGPBLKS + + CLASS(TRGL_VARS), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP, KF_FS + + ALLOCATE(SELF%IUVLEVS(KF_GP)) + ALLOCATE(SELF%IUVPARS(KF_GP)) + ALLOCATE(SELF%IGP2PARS(KF_GP)) + ALLOCATE(SELF%IFLDOFF(KF_FS)) + ALLOCATE(SELF%IGPTROFF(NGPBLKS)) + ALLOCATE(SELF%LLUV(KF_GP)) + ALLOCATE(SELF%LLGP2(KF_GP)) + ALLOCATE(SELF%LLGP3A(KF_GP)) + ALLOCATE(SELF%LLGP3B(KF_GP)) + ALLOCATE(SELF%IGP3APARS(KF_GP)) + ALLOCATE(SELF%IGP3ALEVS(KF_GP)) + ALLOCATE(SELF%IGP3BPARS(KF_GP)) + ALLOCATE(SELF%IGP3BLEVS(KF_GP)) + +END SUBROUTINE TRGL_ALLOCATE_VARS + +SUBROUTINE TRGL_ALLOCATE_HEAP_BUFFER(Z_HEAP, S1, S2) + USE PARKIND1 ,ONLY : JPIM, JPRB + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + + IMPLICIT NONE + + REAL(KIND=JPRB), INTENT(INOUT), ALLOCATABLE :: Z_HEAP(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: S1, S2 + + IF (ALLOCATED(Z_HEAP) .AND. (S1 /= UBOUND(Z_HEAP,1) .OR. S2 /= SIZE(Z_HEAP,2) )) THEN + IF (LBOUND(Z_HEAP,1) /= -1) CALL ABORT_TRANS('TRGL_MOD: WRONG Z_HEAP SIZE IN TRGL_ALLOCATE_HEAP_BUFFER ') + DEALLOCATE(Z_HEAP) + ENDIF + + IF (.NOT. ALLOCATED(Z_HEAP)) THEN + ALLOCATE(Z_HEAP(-1:S1,S2)) + ENDIF +END SUBROUTINE TRGL_ALLOCATE_HEAP_BUFFER + + +SUBROUTINE TRGL_INIT_VARS(YLVARS, KF_SCALARS_G, PGP, PGPUV, PGP3A, PGP3B, PGP2) + USE PARKIND1 ,ONLY : JPIM, JPRB + USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP + IMPLICIT NONE + + TYPE(TRGL_VARS), INTENT(INOUT) :: YLVARS + INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G + REAL(KIND=JPRB),OPTIONAL :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP2(:,:,:) + + ! Local variables + INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF + INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2 + + INTEGER(KIND=JPIM) :: J + + ASSOCIATE(IUVLEVS=>YLVARS%IUVLEVS, IUVPARS=>YLVARS%IUVPARS, IGP2PARS=>YLVARS%IGP2PARS, & + & LLUV=>YLVARS%LLUV, LLGP2=>YLVARS%LLGP2, LLGP3A=>YLVARS%LLGP3A, LLGP3B=>YLVARS%LLGP3B, & + & IGP3APARS=>YLVARS%IGP3APARS, IGP3ALEVS=>YLVARS%IGP3ALEVS, IGP3BPARS=>YLVARS%IGP3BPARS, & + & IGP3BLEVS=>YLVARS%IGP3BLEVS) + + IUVPAR=0 + IUVLEV=0 + IOFF1=0 + IOFFNS=KF_SCALARS_G + IOFFEW=2*KF_SCALARS_G + + LLUV(:) = .FALSE. + IUVPARS(:) = -99 + IUVLEVS(:) = -99 + + IF (PRESENT(PGPUV)) THEN + IOFF=0 + IUVLEV=UBOUND(PGPUV,2) + IF(LVORGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + IF(LDIVGP) THEN + IUVPAR=IUVPAR+1 + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR + LLUV(IOFF+J)=.TRUE. + ENDDO + IOFF=IOFF+IUVLEV + ENDIF + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + ENDDO + IUVPAR=IUVPAR+2 + LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. + IOFF=IOFF+2*IUVLEV + IOFF1=IOFF + IOFFNS=IOFFNS+IOFF + IOFFEW=IOFFEW+IOFF + + IOFF=IUVPAR*IUVLEV+KF_SCALARS_G + IF(LUVDER) THEN + IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G + DO J=1,IUVLEV + IUVLEVS(IOFF+J)=J + IUVPARS(IOFF+J)=IUVPAR+1 + LLUV(IOFF+J)=.TRUE. + IUVLEVS(IOFF+J+IUVLEV)=J + IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 + LLUV(IOFF+J+IUVLEV)=.TRUE. + ENDDO + IUVPAR=IUVPAR+2 + IOFF=IOFF+2*IUVLEV + IOFFEW=IOFFEW+2*IUVLEV + ENDIF + ENDIF + + LLGP2(:)=.FALSE. + IF (PRESENT(PGP2)) THEN + IOFF=IOFF1 + IGP2PAR=UBOUND(PGP2,2) + IF(LSCDERS) IGP2PAR=IGP2PAR/3 + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J + ENDDO + IOFF1=IOFF1+IGP2PAR + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+IGP2PAR + ENDDO + IOFFNS=IOFF+IGP2PAR + IOFF=IOFFEW + DO J=1,IGP2PAR + LLGP2(J+IOFF) = .TRUE. + IGP2PARS(J+IOFF)=J+2*IGP2PAR + ENDDO + IOFFEW=IOFF+IGP2PAR + ENDIF + ENDIF + + LLGP3A(:) = .FALSE. + IF (PRESENT(PGP3A)) THEN + IGP3ALEV=UBOUND(PGP3A,2) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + IOFF=IOFF1 + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3APAR + IOFF1=IOFF1+IGP3APAR*IGP3ALEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3APAR + IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV + IOFF=IOFFEW + DO J1=1,IGP3APAR + DO J2=1,IGP3ALEV + LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. + IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF + IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV + ENDIF + ENDIF + + LLGP3B(:) = .FALSE. + IF (PRESENT(PGP3B)) THEN + IGP3BLEV=UBOUND(PGP3B,2) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + IOFF=IOFF1 + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IGP3BPAR + IOFF1=IOFF1+IGP3BPAR*IGP3BLEV + IF(LSCDERS) THEN + IOFF=IOFFNS + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IPAROFF=IPAROFF+IGP3BPAR + IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV + IOFF=IOFFEW + DO J1=1,IGP3BPAR + DO J2=1,IGP3BLEV + LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. + IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF + IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 + ENDDO + ENDDO + IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV + ENDIF + ENDIF + + END ASSOCIATE +END SUBROUTINE TRGL_INIT_VARS + +SUBROUTINE TRGL_INIT_OFF_VARS(YDBUFS,YLVARS,KVSET,KPTRGP,KF_GP) + USE TPM_DISTR ,ONLY : MYSETV, MYSETW + USE TPM_TRANS ,ONLY : NGPBLKS + + TYPE(TRGL_BUFFERS), INTENT(INOUT) :: YDBUFS + TYPE(TRGL_VARS), INTENT(INOUT) :: YLVARS + INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP + + !local variables + INTEGER(KIND=JPIM) :: JFLD, IFIRST, ILAST,IPOS, JBLK + + ASSOCIATE(KGPTRSEND=>YDBUFS%IGPTRSEND, IFLDS=>YDBUFS%IFLDS, IFLDOFF=>YLVARS%IFLDOFF, & + & IGPTROFF=>YLVARS%IGPTROFF, LLINDER=>YDBUFS%LLINDER) + + IFLDS = 0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN + IFLDS = IFLDS+1 + IF(LLINDER) THEN + IFLDOFF(IFLDS) = KPTRGP(JFLD) + ELSE + IFLDOFF(IFLDS) = JFLD + ENDIF + ENDIF + ENDDO + + IPOS=0 + DO JBLK=1,NGPBLKS + IGPTROFF(JBLK)=IPOS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + IPOS=IPOS+ILAST-IFIRST+1 + ENDIF + ENDDO + + END ASSOCIATE +END SUBROUTINE TRGL_INIT_OFF_VARS + +SUBROUTINE TGRL_INIT_PACKING_VARS(YDBUFS,YLVARS, KVSET, KF_GP, PCOMBUFS) + USE PARKIND1 ,ONLY : JPIM, JPRB + USE TPM_TRANS ,ONLY : NGPBLKS + TYPE(TRGL_BUFFERS), INTENT(INOUT) :: YDBUFS + TYPE(TRGL_VARS), INTENT(INOUT) :: YLVARS + + INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(:) + INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP + REAL(KIND=JPRB), POINTER,INTENT(IN), OPTIONAL :: PCOMBUFS(:,:) + !local variables + INTEGER(KIND=JPIM) :: IFLD, IPOS, JFLD, IFIRST, ILAST, JBLK + INTEGER(KIND=JPIM) :: KINRS, IV, ISETV, INRS + + ASSOCIATE(KGPTRSEND=>YDBUFS%IGPTRSEND, IPOSPLUS=>YDBUFS%IPOSPLUS, IJPOS=>YDBUFS%IJPOS, & + & IFLDA=>YDBUFS%IFLDA, ISETW=>YDBUFS%ISETW) + + IF (YDBUFS%LLTRGTOL) THEN + KINRS = YDBUFS%INSEND + ELSE + KINRS = YDBUFS%INRECV + ENDIF + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INRS, IV, ISETV, JBLK, IFIRST, ILAST, IFLD, IPOS, JFLD) + DO INRS=1,KINRS + IF (YDBUFS%LLTRGTOL) THEN + IV=YDBUFS%ISEND(INRS) + ELSE + IV=YDBUFS%IRECV(INRS) + ENDIF + YDBUFS%ISETW(INRS)=YDBUFS%ISETWL(IV) + ISETV=YDBUFS%ISETVL(IV) + + IFLD = 0 + IPOS = 0 + IPOSPLUS(INRS)=0 + DO JFLD=1,KF_GP + IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN + IFLD = IFLD+1 + IFLDA(IFLD,INRS)=JFLD + ENDIF + ENDDO + + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INRS)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INRS)) + IJPOS(JBLK,INRS)=IPOS + IPOSPLUS(INRS)=IPOSPLUS(INRS)+(ILAST-IFIRST+1) + IPOS=IPOS+(ILAST-IFIRST+1) + ENDIF + ENDDO + IF (PRESENT(PCOMBUFS)) THEN + PCOMBUFS(-1,INRS) = 1 + PCOMBUFS(0,INRS) = IFLD + ENDIF + ENDDO + !$OMP END PARALLEL DO + + END ASSOCIATE +END SUBROUTINE TGRL_INIT_PACKING_VARS + +SUBROUTINE TGRL_COPY_ZCOMBUF(YDBUFS, YLVARS, INRS, ZCOMBUF, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2) + + USE PARKIND1 ,ONLY : JPIM, JPRB + USE TPM_TRANS ,ONLY : NGPBLKS + + TYPE(TRGL_BUFFERS), INTENT(INOUT) :: YDBUFS + TYPE(TRGL_VARS), INTENT(INOUT) :: YLVARS + INTEGER(KIND=JPIM), INTENT(IN) :: INRS + REAL(KIND=JPRB), POINTER, CONTIGUOUS, INTENT(INOUT) :: ZCOMBUF(:,:) + INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) + REAL(KIND=JPRB),OPTIONAL :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP2(:,:,:) + + !Local variables + INTEGER(KIND=JPIM) :: I_FLD_START,I_FLD_END + INTEGER(KIND=JPIM) :: IFIRST, ILAST + INTEGER(KIND=JPIM) :: JJ,JI,JK,IFLDT, JBLK, IPOS + + ASSOCIATE(IUVLEVS=>YLVARS%IUVLEVS, IUVPARS=>YLVARS%IUVPARS, IGP2PARS=>YLVARS%IGP2PARS, & + & LLUV=>YLVARS%LLUV, LLGP2=>YLVARS%LLGP2, LLGP3A=>YLVARS%LLGP3A, LLGP3B=>YLVARS%LLGP3B, & + & IGP3APARS=>YLVARS%IGP3APARS, IGP3ALEVS=>YLVARS%IGP3ALEVS, IGP3BPARS=>YLVARS%IGP3BPARS, & + & IGP3BLEVS=>YLVARS%IGP3BLEVS, KGPTRSEND =>YDBUFS%IGPTRSEND, IFLDA=>YDBUFS%IFLDA, & + & IPOSPLUS=>YDBUFS%IPOSPLUS, JPOS=>YDBUFS%IJPOS, ISETW=>YDBUFS%ISETW, & + & LLPGPONLY=>YDBUFS%LLPGPONLY, LLINDER=>YDBUFS%LLINDER) + + IPOS=IPOSPLUS(INRS) + I_FLD_START = ZCOMBUF(-1,INRS) + I_FLD_END = ZCOMBUF(0,INRS) + + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,IFIRST,ILAST,JK,JJ,JI,JBLK) + DO JJ=I_FLD_START,I_FLD_END + IFLDT=IFLDA(JJ,INRS) + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,ISETW(INRS)) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,ISETW(INRS)) + IF(LLINDER) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGP(JK,KPTRGP(IFLDT),JBLK) + ELSE + PGP(JK,KPTRGP(IFLDT),JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ELSEIF(LLPGPONLY) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGP(JK,IFLDT,JBLK) + ELSE + PGP(JK,IFLDT,JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ELSEIF(LLUV(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) + ELSE + PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ELSEIF(LLGP2(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) + ELSE + PGP2(JK,IGP2PARS(IFLDT),JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ELSEIF(LLGP3A(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) + ELSE + PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ELSEIF(LLGP3B(IFLDT)) THEN + DO JK=IFIRST,ILAST + JI=(JJ-I_FLD_START)*IPOS+JPOS(JBLK,INRS)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + ZCOMBUF(JI,INRS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) + ELSE + PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = ZCOMBUF(JI,INRS) + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + ENDDO + !$OMP END PARALLEL DO + + END ASSOCIATE +END SUBROUTINE TGRL_COPY_ZCOMBUF + + +SUBROUTINE TGRL_COPY_PGLAT(PGLAT, YDBUFS, YLVARS, PGP, PGPUV, PGP3A, PGP3B, PGP2) + + USE PARKIND1 ,ONLY : JPIM, JPRB, JPIB + USE TPM_DISTR ,ONLY : MYSETW, MYPROC + USE TPM_GEN ,ONLY : NOUT + USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + USE TPM_TRANS ,ONLY : NGPBLKS + + REAL(KIND=JPRB),OPTIONAL :: PGLAT(:,:) + TYPE(TRGL_BUFFERS), INTENT(INOUT) :: YDBUFS + TYPE(TRGL_VARS), INTENT(INOUT) :: YLVARS + REAL(KIND=JPRB),OPTIONAL :: PGP(:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGPUV(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3A(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP3B(:,:,:,:) + REAL(KIND=JPRB),OPTIONAL :: PGP2(:,:,:) + + !Local variables + INTEGER(KIND=JPIM) :: IFIRST, ILAST, IFLD, IPOS, JBLK, JK + INTEGER(KIND=JPIB) :: JFLD64 + + ASSOCIATE(IUVLEVS=>YLVARS%IUVLEVS,IFLDOFF=>YLVARS%IFLDOFF, IGPTROFF=>YLVARS%IGPTROFF, & + & IUVPARS=>YLVARS%IUVPARS, IGP2PARS=>YLVARS%IGP2PARS, LLUV=>YLVARS%LLUV, & + & LLGP2=>YLVARS%LLGP2, LLGP3A=>YLVARS%LLGP3A, LLGP3B=>YLVARS%LLGP3B, & + & IGP3APARS=>YLVARS%IGP3APARS, IGP3ALEVS=>YLVARS%IGP3ALEVS, & + & IGP3BPARS=>YLVARS%IGP3BPARS, IGP3BLEVS=>YLVARS%IGP3BLEVS, KINDEX=>YDBUFS%IINDEX, & + & KNDOFF=>YDBUFS%INDOFF, KGPTRSEND =>YDBUFS%IGPTRSEND, IFLDS=>YDBUFS%IFLDS, & + & LLPGPONLY=>YDBUFS%LLPGPONLY) + +#ifdef __NEC__ + ! Loops inversion is still better on Aurora machines, according to CHMI. REK. + !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#else + !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) +#endif + DO JBLK=1,NGPBLKS + IFIRST = KGPTRSEND(1,JBLK,MYSETW) + IF(IFIRST > 0) THEN + ILAST = KGPTRSEND(2,JBLK,MYSETW) + ! Address PGLAT over 64 bits because its size may exceed 2 GB for big data and + ! small number of tasks. + IF(LLPGPONLY) THEN + DO JFLD64=1,IFLDS + IFLD = IFLDOFF(JFLD64) + !DIR$ VECTOR ALWAYS + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + PGLAT(JFLD64,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) + ELSE + PGP(JK,IFLD,JBLK) = PGLAT(JFLD64,KINDEX(IPOS)) + ENDIF + ENDDO + ENDDO + ELSE + DO JFLD64=1,IFLDS + IFLD = IFLDOFF(JFLD64) + IF(LLUV(IFLD)) THEN + !DIR$ VECTOR ALWAYS + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + PGLAT(JFLD64,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) + ELSE + PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD64,KINDEX(IPOS)) + ENDIF + ENDDO + ELSEIF(LLGP2(IFLD)) THEN + !DIR$ VECTOR ALWAYS + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + PGLAT(JFLD64,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) + ELSE + PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) + ENDIF + ENDDO + ELSEIF(LLGP3A(IFLD)) THEN + !DIR$ VECTOR ALWAYS + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + PGLAT(JFLD64,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) + ELSE + PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) + ENDIF + ENDDO + ELSEIF(LLGP3B(IFLD)) THEN + !DIR$ VECTOR ALWAYS + DO JK=IFIRST,ILAST + IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 + IF (YDBUFS%LLTRGTOL) THEN + PGLAT(JFLD64,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) + ELSE + PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) + ENDIF + ENDDO + ELSE + WRITE(NOUT,*)'TRGTOL_MOD: ERROR',JFLD64,IFLD + CALL ABORT_TRANS('TRGTOL_MOD: ERROR') + ENDIF + ENDDO + ENDIF + ENDIF + ENDDO + !$OMP END PARALLEL DO + + END ASSOCIATE +END SUBROUTINE TGRL_COPY_PGLAT + + +SUBROUTINE TRGL_PROLOG(KF_FS,KF_GP,KVSET,YDBUFS) + + USE PARKIND1 ,ONLY : JPIM + + USE TPM_DISTR ,ONLY : D, MYSETW, NPRTRNS, MYPROC, NPROC + + USE INIGPTR_MOD ,ONLY : INIGPTR + USE PE2SET_MOD ,ONLY : PE2SET + + IMPLICIT NONE + + INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP + TYPE (TRGL_BUFFERS), INTENT(INOUT) :: YDBUFS + INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) + INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) + INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, IPOS, ISETA, ISETB, ISETV + INTEGER(KIND=JPIM) :: JFLD, JGL, JL, ISETW, JROC, J + INTEGER(KIND=JPIM) :: INDOFFX + + ! ------------------------------------------------------------------ + !* 0. Some initializations + ! -------------------- + + CALL INIGPTR(YDBUFS%IGPTRSEND,IGPTRRECV) + + INDOFFX = 0 + YDBUFS%INRECV = 0 + YDBUFS%INSEND = 0 + + DO JROC=1,NPROC + + CALL PE2SET(JROC,ISETA,ISETB,YDBUFS%ISETWL(JROC),YDBUFS%ISETVL(JROC)) + + ISETW=YDBUFS%ISETWL(JROC) + ISETV=YDBUFS%ISETVL(JROC) + + ! Count up expected number of fields + IPOS = COUNT(KVSET == ISETV .OR. KVSET == -1) + + IF (YDBUFS%LLTRGTOL) THEN + YDBUFS%ISENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF( JROC /= MYPROC) THEN + IF(YDBUFS%ISENDTOT(JROC) > 0) THEN + YDBUFS%INSEND = YDBUFS%INSEND+1 + YDBUFS%ISEND(YDBUFS%INSEND)=JROC + ENDIF + ENDIF + ELSE + YDBUFS%IRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS + IF(YDBUFS%IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + YDBUFS%INRECV = YDBUFS%INRECV + 1 + YDBUFS%IRECV(YDBUFS%INRECV)=JROC + ENDIF + ENDIF + IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) + ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) + + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IPOS = IPOS+D%NONL(IGL,ISETB) + ENDDO + + IF (YDBUFS%LLTRGTOL) THEN + YDBUFS%IRECVTOT(JROC) = IPOS*KF_FS + IF(YDBUFS%IRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN + YDBUFS%INRECV = YDBUFS%INRECV + 1 + YDBUFS%IRECV(YDBUFS%INRECV)=JROC + ENDIF + ELSE + YDBUFS%ISENDTOT(JROC) = IPOS*KF_FS + IF( JROC /= MYPROC) THEN + IF(YDBUFS%ISENDTOT(JROC) > 0) THEN + YDBUFS%INSEND = YDBUFS%INSEND+1 + YDBUFS%ISEND(YDBUFS%INSEND)=JROC + ENDIF + ENDIF + ENDIF + + IF(IPOS > 0) THEN + YDBUFS%INDOFF(JROC) = INDOFFX + INDOFFX = INDOFFX+IPOS + IPOS = 0 + DO JGL=IFIRSTLAT,ILASTLAT + IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) + IGLL = JGL-D%NPTRLS(MYSETW)+1 + DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& + &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 + IPOS = IPOS+1 + YDBUFS%IINDEX(IPOS+YDBUFS%INDOFF(JROC)) = JL + ENDDO + ENDDO + ENDIF + ENDDO + + YDBUFS%ISENDCOUNT = MAXVAL(YDBUFS%ISENDTOT) + YDBUFS%IRECVCOUNT = MAXVAL(YDBUFS%IRECVTOT) + +END SUBROUTINE TRGL_PROLOG + +END MODULE TRGL_MOD diff --git a/src/trans/cpu/internal/trgtol_mod.F90 b/src/trans/cpu/internal/trgtol_mod.F90 index 61ceffd63..acc44418a 100644 --- a/src/trans/cpu/internal/trgtol_mod.F90 +++ b/src/trans/cpu/internal/trgtol_mod.F90 @@ -1,6 +1,6 @@ ! (C) Copyright 1995- ECMWF. ! (C) Copyright 1995- Meteo-France. -! +! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities @@ -10,13 +10,14 @@ MODULE TRGTOL_MOD +IMPLICIT NONE + PUBLIC TRGTOL -PRIVATE TRGTOL_PROLOG, TRGTOL_COMM, TRGTOL_COMM_HEAP, TRGTOL_COMM_STACK +PRIVATE TRGTOL_COMM CONTAINS -SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& - &PGP,PGPUV,PGP3A,PGP3B,PGP2) +SUBROUTINE TRGTOL(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2) !**** *TRGTOL * - head routine for transposition of grid point data from column ! structure to latitudinal. Reorganize data between @@ -56,16 +57,15 @@ SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& USE PARKIND1 ,ONLY : JPIM ,JPRB USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS +USE TPM_DISTR ,ONLY : D +USE TRGL_MOD, ONLY: TRGL_BUFFERS, ALLOCATE_BUFFERS_CST, TRGL_PROLOG, ALLOCATE_BUFFERS_SR IMPLICIT NONE -INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) -INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) @@ -73,17 +73,7 @@ SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) -INTEGER(KIND=JPIM) :: ISENDCOUNT -INTEGER(KIND=JPIM) :: IRECVCOUNT -INTEGER(KIND=JPIM) :: INSEND -INTEGER(KIND=JPIM) :: INRECV -INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) -INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) -INTEGER(KIND=JPIM) :: ISEND (NPROC) -INTEGER(KIND=JPIM) :: IRECV (NPROC) -INTEGER(KIND=JPIM) :: IINDEX(D%NLENGTF) -INTEGER(KIND=JPIM) :: INDOFF(NPROC) -INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) +TYPE (TRGL_BUFFERS) :: YDBUFS REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -91,17 +81,15 @@ SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) -CALL TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,& - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND) -IF (NSTACK_MEMORY_TR==0) THEN - CALL TRGTOL_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) -ELSE - CALL TRGTOL_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) -ENDIF +YDBUFS%LLTRGTOL = .TRUE. +CALL ALLOCATE_BUFFERS_CST(YDBUFS) +CALL GSTATS(1805, 0) +CALL TRGL_PROLOG(KF_FS, KF_GP, KVSET, YDBUFS) +CALL GSTATS(1805, 1) +CALL ALLOCATE_BUFFERS_SR(YDBUFS, KF_GP) + +CALL TRGTOL_COMM(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2, & + & YDBUFS) IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) @@ -109,275 +97,8 @@ SUBROUTINE TRGTOL(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& END SUBROUTINE TRGTOL -SUBROUTINE TRGTOL_PROLOG(KF_FS,KF_GP,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND) - -!**** *TRGTOL_PROLOG * - prolog for transposition of grid point data from column -! structure to latitudinal. Reorganize data between -! grid point calculations and direct Fourier Transform -! the purpose is essentially -! to compute the size of communication buffers in order to enable -! the use of automatic arrays later. - - -! Purpose. -! -------- - - -!** Interface. -! ---------- -! *call* *trgtol_prolog(...) - -! Explicit arguments : -! -------------------- - -! Implicit arguments : -! -------------------- - -! Method. -! ------- -! See documentation - -! Externals. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! R. El Khatib *Meteo-France* - -! Modifications. -! -------------- -! Original : 18-Aug-2014 from trgtol -! ------------------------------------------------------------------ - - - -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DISTR ,ONLY : D, NPRTRNS, MYSETW, MYPROC, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -USE INIGPTR_MOD ,ONLY : INIGPTR -USE PE2SET_MOD ,ONLY : PE2SET -! - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP -INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) - -INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(OUT) :: KNSEND -INTEGER(KIND=JPIM), INTENT(OUT) :: KNRECV -INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(OUT) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) - -INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) -INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, JROC, IPOS, ISETB, ISETA -INTEGER(KIND=JPIM) :: ISETV, J, JFLD, JGL, JL, ISETW, INDOFFX - -! ------------------------------------------------------------------ - -!* 0. Some initializations -! -------------------- - -CALL GSTATS(1805,0) - -CALL INIGPTR(KGPTRSEND,IGPTRRECV) - -INDOFFX = 0 -KNRECV = 0 -KNSEND = 0 - -DO JROC=1,NPROC - - CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) - - ! count up expected number of fields - IPOS = 0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 - ENDDO - KSENDTOT(JROC) = IGPTRRECV(ISETW)*IPOS - - IF( JROC /= MYPROC) THEN - IF(KSENDTOT(JROC) > 0) THEN - KNSEND = KNSEND+1 - KSEND(KNSEND)=JROC - ENDIF - ENDIF - - IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) - ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) - - IPOS = 0 - DO JGL=IFIRSTLAT,ILASTLAT - IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) - IPOS = IPOS+D%NONL(IGL,ISETB) - ENDDO - - KRECVTOT(JROC) = IPOS*KF_FS - IF(KRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN - KNRECV = KNRECV + 1 - KRECV(KNRECV)=JROC - ENDIF - - IF(IPOS > 0) THEN - KNDOFF(JROC) = INDOFFX - INDOFFX = INDOFFX+IPOS - IPOS = 0 - DO JGL=IFIRSTLAT,ILASTLAT - IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) - IGLL = JGL-D%NPTRLS(MYSETW)+1 - DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& - &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 - IPOS = IPOS+1 - KINDEX(IPOS+KNDOFF(JROC)) = JL - ENDDO - ENDDO - ENDIF - -ENDDO - -KSENDCOUNT=0 -KRECVCOUNT=0 -DO J=1,NPROC - KSENDCOUNT=MAX(KSENDCOUNT,KSENDTOT(J)) - KRECVCOUNT=MAX(KRECVCOUNT,KRECVTOT(J)) -ENDDO - -CALL GSTATS(1805,1) - -END SUBROUTINE TRGTOL_PROLOG - -SUBROUTINE TRGTOL_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) - -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP -REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) -INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) -INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) - -REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFS_HEAP(:,:) -REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFR_HEAP(:,:) -INTEGER(KIND=JPIM), SAVE :: INRECV_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: INSEND_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: IRECVCOUNT_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: ISENDCOUNT_PREV = -1 - -IF ( .NOT. ALLOCATED(ZCOMBUFS_HEAP) ) THEN - ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND)) - ISENDCOUNT_PREV = KSENDCOUNT - INSEND_PREV = KNSEND -ELSEIF ( KSENDCOUNT /= ISENDCOUNT_PREV .OR. KNSEND /= INSEND_PREV ) THEN - DEALLOCATE(ZCOMBUFS_HEAP) - ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND)) - ISENDCOUNT_PREV = KSENDCOUNT - INSEND_PREV = KNSEND -ENDIF - -! Now, force the OS to allocate this shared array right now, not when it starts to be used which is -! an OPEN-MP loop, that would cause a threads synchronization lock : -IF (KNSEND > 0 .AND. KSENDCOUNT >= -1) ZCOMBUFS_HEAP(-1,1) = HUGE(1._JPRB) - -IF ( .NOT. ALLOCATED(ZCOMBUFR_HEAP) ) THEN - ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV)) - IRECVCOUNT_PREV = KRECVCOUNT - INRECV_PREV = KNRECV -ELSEIF ( KRECVCOUNT /= IRECVCOUNT_PREV .OR. KNRECV /= INRECV_PREV ) THEN - DEALLOCATE(ZCOMBUFR_HEAP) - ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV)) - IRECVCOUNT_PREV = KRECVCOUNT - INRECV_PREV = KNRECV -ENDIF - -CALL TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & ZCOMBUFS_HEAP,ZCOMBUFR_HEAP, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) - -END SUBROUTINE TRGTOL_COMM_HEAP - -SUBROUTINE TRGTOL_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) - -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP -REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) -INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) -INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) - -REAL(KIND=JPRB) :: ZCOMBUFS_STACK(-1:KSENDCOUNT,KNSEND) -REAL(KIND=JPRB) :: ZCOMBUFR_STACK(-1:KRECVCOUNT,KNRECV) - -CALL TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & ZCOMBUFS_STACK,ZCOMBUFR_STACK, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) - -END SUBROUTINE TRGTOL_COMM_STACK - -SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & PCOMBUFS,PCOMBUFR, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2) +SUBROUTINE TRGTOL_COMM(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, & + & PGP2,YDBUFS) !**** *TRGTOL_COMM * - transposition of grid point data from column ! structure to latitudinal. Reorganize data between @@ -434,22 +155,19 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& ! R. El Khatib 09-Sep-2020 64 bits addressing for PGLAT ! ------------------------------------------------------------------ - - -USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPIB +USE PARKIND1 ,ONLY : JPIM ,JPRB USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_WAITANY, & - & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED + & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED -USE TPM_GEN ,ONLY : NOUT, NTRANS_SYNC_LEVEL -USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRNS, MTAGGL, & - & MYSETV, MYSETW, MYPROC, NPROC -USE TPM_TRANS ,ONLY : LDIVGP, LGPNORM, LSCDERS, LUVDER, LVORGP, NGPBLKS +USE TPM_GEN ,ONLY : NTRANS_SYNC_LEVEL, NSTACK_MEMORY_TR +USE TPM_DISTR ,ONLY : D, MTAGGL, NPRCIDS, MYPROC, NPROC +USE TPM_TRANS ,ONLY : LGPNORM -USE PE2SET_MOD ,ONLY : PE2SET -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -! +USE TRGL_MOD, ONLY: TRGL_BUFFERS, TRGL_VARS, TRGL_ALLOCATE_VARS, TRGL_ALLOCATE_HEAP_BUFFER, & + & TRGL_INIT_VARS, TRGL_INIT_OFF_VARS, TGRL_COPY_ZCOMBUF, TGRL_COPY_PGLAT, & + & TGRL_INIT_PACKING_VARS IMPLICIT NONE @@ -457,52 +175,34 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& REAL(KIND=JPRB),INTENT(OUT) :: PGLAT(KF_FS,D%NLENGTF) INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFS(-1:KSENDCOUNT,KNSEND) -REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFR(-1:KRECVCOUNT,KNRECV) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) - -INTEGER(KIND=JPIM) :: IPOSPLUS(KNSEND) -INTEGER(KIND=JPIM) :: ISETW(KNSEND) -INTEGER(KIND=JPIM) :: IJPOS(NGPBLKS,KNSEND) -INTEGER(KIND=JPIM) :: IFLDA(KF_GP,KNSEND) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP2(:,:,:) + +TYPE(TRGL_BUFFERS), INTENT(INOUT), TARGET :: YDBUFS +! LOCAL VARIABLES +TYPE(TRGL_VARS) :: YLVARS INTEGER(KIND=JPIM) :: IREQ_SEND(NPROC) INTEGER(KIND=JPIM) :: IREQ_RECV(NPROC) -! LOCAL LOGICAL SCALARS -LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY, LLINDER -LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) - ! LOCAL INTEGER SCALARS -INTEGER(KIND=JPIM) :: IFIRST, ILAST, ILEN, IPOS, ISETA, ISETB, IRECV, ISETV -INTEGER(KIND=JPIM) :: ISEND, ITAG, JBLK, JFLD, JK, JL, IFLD, II, IFLDS, INS, INR -INTEGER(KIND=JPIM) :: JJ,JI,IFLDT, J - -INTEGER(KIND=JPIB) :: JFLD64 - -INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) -INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) -INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF -INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2, JNR -INTEGER(KIND=JPIM) :: IFLDOFF(KF_FS) -INTEGER(KIND=JPIM) :: ISEND_FLD_START,ISEND_FLD_END +INTEGER(KIND=JPIM) :: IRECV +INTEGER(KIND=JPIM) :: ISEND, ITAG, JL, JFLD, INS, INR, JNR +INTEGER(KIND=JPIM) :: II,ILEN INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END -INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + +! LOCAL ARRAYS +REAL(KIND=JPRB), TARGET :: ZCOMBUFS_STACK(-1:YDBUFS%ISENDCOUNT,MERGE (YDBUFS%INSEND,0,NSTACK_MEMORY_TR/=0)) +REAL(KIND=JPRB), TARGET :: ZCOMBUFR_STACK(-1:YDBUFS%IRECVCOUNT,MERGE (YDBUFS%INRECV,0,NSTACK_MEMORY_TR/=0)) + +REAL(KIND=JPRB), ALLOCATABLE, TARGET, SAVE :: ZCOMBUFS_HEAP(:,:) +REAL(KIND=JPRB), ALLOCATABLE, TARGET, SAVE :: ZCOMBUFR_HEAP(:,:) + +REAL(KIND=JPRB), POINTER, CONTIGUOUS :: ZCOMBUFS(:,:) +REAL(KIND=JPRB), POINTER, CONTIGUOUS :: ZCOMBUFR(:,:) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR @@ -510,6 +210,23 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& !* 0. Some initializations ! -------------------- +ASSOCIATE(KNSEND=>YDBUFS%INSEND, KNRECV=>YDBUFS%INRECV, KSENDTOT=>YDBUFS%ISENDTOT, & + & KRECVTOT=>YDBUFS%IRECVTOT, KSEND=>YDBUFS%ISEND, KRECV=>YDBUFS%IRECV, & + & KINDEX=>YDBUFS%IINDEX, KNDOFF=>YDBUFS%INDOFF) + +IF (NSTACK_MEMORY_TR == 0) THEN + CALL TRGL_ALLOCATE_HEAP_BUFFER(ZCOMBUFS_HEAP, YDBUFS%ISENDCOUNT, YDBUFS%INSEND) + CALL TRGL_ALLOCATE_HEAP_BUFFER(ZCOMBUFR_HEAP, YDBUFS%IRECVCOUNT, YDBUFS%INRECV) + + ! Now, force the OS to allocate this shared array right now, not when it starts to be used which + ! is an OPEN-MP loop, that would cause a threads synchronization lock : + IF (YDBUFS%INSEND > 0 .AND. YDBUFS%ISENDCOUNT >=-1) ZCOMBUFS_HEAP(-1,1)=HUGE(1._JPRB) + ZCOMBUFS (-1:,1:) => ZCOMBUFS_HEAP + ZCOMBUFR (-1:,1:) => ZCOMBUFR_HEAP +ELSE + ZCOMBUFS (-1:,1:) => ZCOMBUFS_STACK + ZCOMBUFR (-1:,1:) => ZCOMBUFR_STACK +ENDIF ITAG = MTAGGL @@ -524,13 +241,12 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& ENDIF IF (NTRANS_SYNC_LEVEL <= 0) THEN - ! Receive loop......................................................... + !...Receive loop......................................................... DO INR=1,KNRECV IRECV=KRECV(INR) - CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), & - & KSOURCE=NPRCIDS(IRECV), & - & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_RECV(INR), & - & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING IRECV' ) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD, KREQUEST=IREQ_RECV(INR), KTAG=ITAG, & + & CDSTRING='TRGTOL_COMM: NON-BLOCKING IRECV' ) ENDDO ENDIF @@ -541,271 +257,21 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& ENDIF CALL GSTATS(1805,0) -LLINDER = .FALSE. -LLPGPUV = .FALSE. -LLPGP3A = .FALSE. -LLPGP3B = .FALSE. -LLPGP2 = .FALSE. -LLPGPONLY = .FALSE. -IF(PRESENT(KPTRGP)) LLINDER = .TRUE. -IF(PRESENT(PGP)) LLPGPONLY = .TRUE. -IF(PRESENT(PGPUV)) LLPGPUV = .TRUE. -IF(PRESENT(PGP3A)) LLPGP3A = .TRUE. -IF(PRESENT(PGP3B)) LLPGP3B = .TRUE. -IF(PRESENT(PGP2)) LLPGP2 = .TRUE. - -IUVPAR=0 -IUVLEV=0 -IOFF1=0 -IOFFNS=KF_SCALARS_G -IOFFEW=2*KF_SCALARS_G - -LLUV(:) = .FALSE. -IUVPARS(:) = -99 -IUVLEVS(:) = -99 -IF (LLPGPUV) THEN - IOFF=0 - IUVLEV=UBOUND(PGPUV,2) - IF(LVORGP) THEN - IUVPAR=IUVPAR+1 - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR - LLUV(IOFF+J)=.TRUE. - ENDDO - IOFF=IOFF+IUVLEV - ENDIF - IF(LDIVGP) THEN - IUVPAR=IUVPAR+1 - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR - LLUV(IOFF+J)=.TRUE. - ENDDO - IOFF=IOFF+IUVLEV - ENDIF - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR+1 - IUVLEVS(IOFF+J+IUVLEV)=J - IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 - ENDDO - IUVPAR=IUVPAR+2 - LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. - IOFF=IOFF+2*IUVLEV - IOFF1=IOFF - IOFFNS=IOFFNS+IOFF - IOFFEW=IOFFEW+IOFF - - IOFF=IUVPAR*IUVLEV+KF_SCALARS_G - IF(LUVDER) THEN - IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR+1 - LLUV(IOFF+J)=.TRUE. - IUVLEVS(IOFF+J+IUVLEV)=J - IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 - LLUV(IOFF+J+IUVLEV)=.TRUE. - ENDDO - IUVPAR=IUVPAR+2 - IOFF=IOFF+2*IUVLEV - IOFFEW=IOFFEW+2*IUVLEV - ENDIF -ENDIF - -LLGP2(:)=.FALSE. -IF(LLPGP2) THEN - IOFF=IOFF1 - IGP2PAR=UBOUND(PGP2,2) - IF(LSCDERS) IGP2PAR=IGP2PAR/3 - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J - ENDDO - IOFF1=IOFF1+IGP2PAR - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J+IGP2PAR - ENDDO - IOFFNS=IOFF+IGP2PAR - IOFF=IOFFEW - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J+2*IGP2PAR - ENDDO - IOFFEW=IOFF+IGP2PAR - ENDIF -ENDIF - -LLGP3A(:) = .FALSE. -IF(LLPGP3A) THEN - IGP3ALEV=UBOUND(PGP3A,2) - IGP3APAR=UBOUND(PGP3A,3) - IF(LSCDERS) IGP3APAR=IGP3APAR/3 - IOFF=IOFF1 - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IGP3APAR - IOFF1=IOFF1+IGP3APAR*IGP3ALEV - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IPAROFF+IGP3APAR - IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV - IOFF=IOFFEW - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV - ENDIF -ENDIF - -LLGP3B(:) = .FALSE. -IF(LLPGP3B) THEN - IGP3BLEV=UBOUND(PGP3B,2) - IGP3BPAR=UBOUND(PGP3B,3) - IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 - IOFF=IOFF1 - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IGP3BPAR - IOFF1=IOFF1+IGP3BPAR*IGP3BLEV - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IPAROFF+IGP3BPAR - IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV - IOFF=IOFFEW - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV - ENDIF -ENDIF - +YDBUFS%LLINDER = PRESENT(KPTRGP) +YDBUFS%LLPGPONLY = PRESENT(PGP) +CALL TRGL_ALLOCATE_VARS(YLVARS, KF_GP,KF_FS) +CALL TRGL_INIT_VARS(YLVARS, KF_SCALARS_G, PGP, PGPUV, PGP3A, PGP3B, PGP2) CALL GSTATS(1805,1) - ! Copy local contribution IF(KSENDTOT(MYPROC) > 0 )THEN - IFLDS = 0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN - IFLDS = IFLDS+1 - IF(LLINDER) THEN - IFLDOFF(IFLDS) = KPTRGP(JFLD) - ELSE - IFLDOFF(IFLDS) = JFLD - ENDIF - ENDIF - ENDDO - - IPOS=0 - DO JBLK=1,NGPBLKS - IGPTROFF(JBLK)=IPOS - IFIRST = KGPTRSEND(1,JBLK,MYSETW) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,MYSETW) - IPOS=IPOS+ILAST-IFIRST+1 - ENDIF - ENDDO + CALL TRGL_INIT_OFF_VARS(YDBUFS,YLVARS,KVSET,KPTRGP,KF_GP) CALL GSTATS(1601,0) -#ifdef __NEC__ - ! Loops inversion is still better on Aurora machines, according to CHMI. REK. - !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) -#else - !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) -#endif - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,MYSETW) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,MYSETW) - ! Address PGLAT over 64 bits because its size may exceed 2 GB for big data and - ! small number of tasks. - IF(LLPGPONLY) THEN - DO JFLD64=1,IFLDS - IFLD = IFLDOFF(JFLD64) - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGLAT(JFLD64,KINDEX(IPOS)) = PGP(JK,IFLD,JBLK) - ENDDO - ENDDO - ELSE - DO JFLD64=1,IFLDS - IFLD = IFLDOFF(JFLD64) - IF(LLUV(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGLAT(JFLD64,KINDEX(IPOS)) = PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) - ENDDO - ELSEIF(LLGP2(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGLAT(JFLD64,KINDEX(IPOS)) = PGP2(JK,IGP2PARS(IFLD),JBLK) - ENDDO - ELSEIF(LLGP3A(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGLAT(JFLD64,KINDEX(IPOS)) = PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK) - ENDDO - ELSEIF(LLGP3B(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGLAT(JFLD64,KINDEX(IPOS)) = PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK) - ENDDO - ELSE - WRITE(NOUT,*)'TRGTOL_MOD: ERROR',JFLD64,IFLD - CALL ABORT_TRANS('TRGTOL_MOD: ERROR') - ENDIF - ENDDO - ENDIF - ENDIF - ENDDO - !$OMP END PARALLEL DO + CALL TGRL_COPY_PGLAT(PGLAT, YDBUFS, YLVARS, PGP, PGPUV, PGP3A, PGP3B, PGP2) CALL GSTATS(1601,1) - ENDIF - ! Now overlapping buffer packing/unpacking with sends/waits ! Time as if all communications to avoid double accounting @@ -817,103 +283,25 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& !....Pack+send loop......................................................... -!$OMP PARALLEL PRIVATE(JBLK,IFIRST,ILAST,ISEND_FLD_START,ISEND_FLD_END,INS,ISEND,ISETA,ISETB,& -!$OMP& ISETV,IFLD,IFLDT,IPOS,JFLD,JK,JJ,JI) -!$OMP DO SCHEDULE(STATIC) -DO INS=1,KNSEND - ISEND=KSEND(INS) - CALL PE2SET(ISEND,ISETA,ISETB,ISETW(INS),ISETV) - IFLD = 0 - IPOS = 0 - IPOSPLUS(INS)=0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN - IFLD = IFLD+1 - IFLDA(IFLD,INS)=JFLD - ENDIF - ENDDO +CALL TGRL_INIT_PACKING_VARS(YDBUFS,YLVARS, KVSET, KF_GP, ZCOMBUFS) - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,ISETW(INS)) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,ISETW(INS)) - IJPOS(JBLK,INS)=IPOS - IPOSPLUS(INS)=IPOSPLUS(INS)+(ILAST-IFIRST+1) - IPOS=IPOS+(ILAST-IFIRST+1) - ENDIF - ENDDO +DO INS=1,KNSEND - PCOMBUFS(-1,INS) = 1 - PCOMBUFS(0,INS) = IFLD + CALL TGRL_COPY_ZCOMBUF(YDBUFS, YLVARS, INS, ZCOMBUFS, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2) ENDDO -!$OMP END DO -DO INS=1,KNSEND - ISEND=KSEND(INS) - IPOS=IPOSPLUS(INS) - - ISEND_FLD_START=PCOMBUFS(-1,INS) - ISEND_FLD_END = PCOMBUFS(0,INS) - - !$OMP DO SCHEDULE(STATIC) - DO JJ=ISEND_FLD_START,ISEND_FLD_END - IFLDT=IFLDA(JJ,INS) - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,ISETW(INS)) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,ISETW(INS)) - IF(LLINDER) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGP(JK,KPTRGP(IFLDT),JBLK) - ENDDO - ELSE - IF(LLPGPONLY) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGP(JK,IFLDT,JBLK) - ENDDO - ELSEIF(LLUV(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) - ENDDO - ELSEIF(LLGP2(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGP2(JK,IGP2PARS(IFLDT),JBLK) - ENDDO - ELSEIF(LLGP3A(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) - ENDDO - ELSEIF(LLGP3B(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-ISEND_FLD_START)*IPOS+IJPOS(JBLK,INS)+JK-IFIRST+1 - PCOMBUFS(JI,INS) = PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) - ENDDO - ENDIF - ENDIF - ENDIF - ENDDO - ENDDO -!$OMP END DO -ENDDO -!$OMP END PARALLEL DO INS=1,KNSEND ISEND=KSEND(INS) IF (NTRANS_SYNC_LEVEL <= 1) THEN - CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & - & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_SEND(INS), & - & KTAG=ITAG,CDSTRING='TRGTOL_COMM: NON-BLOCKING ISEND' ) + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD, KREQUEST=IREQ_SEND(INS), KTAG=ITAG, & + & CDSTRING='TRGTOL_COMM: NON-BLOCKING ISEND') ELSE - CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND), & - & KMP_TYPE=JP_BLOCKING_BUFFERED, & - & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING BUFFERED BSEND' ) + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS), KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_BLOCKING_BUFFERED, KTAG=ITAG, & + & CDSTRING='TRGTOL_COMM: BLOCKING BUFFERED BSEND') ENDIF - ENDDO ! Unpack loop......................................................... @@ -921,30 +309,27 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& DO JNR=1,KNRECV IF (NTRANS_SYNC_LEVEL <= 0) THEN - CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV),KINDEX=INR,& - & CDSTRING='TRGTOL_COMM: WAIT FOR ANY RECEIVES') + CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV), KINDEX=INR, & + & CDSTRING='TRGTOL_COMM: WAIT FOR ANY RECEIVES') ELSE INR = JNR IRECV=KRECV(INR) - CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), & - & KSOURCE=NPRCIDS(IRECV), & - & KMP_TYPE=JP_BLOCKING_STANDARD, & - & KTAG=ITAG,CDSTRING='TRGTOL_COMM: BLOCKING RECV' ) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_BLOCKING_STANDARD, KTAG=ITAG, CDSTRING='TRGTOL_COMM: BLOCKING RECV' ) ENDIF IRECV=KRECV(INR) ILEN = KRECVTOT(IRECV)/KF_FS - IRECV_FLD_START = PCOMBUFR(-1,INR) - IRECV_FLD_END = PCOMBUFR(0,INR) + IRECV_FLD_START = ZCOMBUFR(-1,INR) + IRECV_FLD_END = ZCOMBUFR(0,INR) !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JL,II,JFLD) DO JL=1,ILEN II = KINDEX(KNDOFF(IRECV)+JL) DO JFLD=IRECV_FLD_START,IRECV_FLD_END - PGLAT(JFLD,II) = PCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) + PGLAT(JFLD,II) = ZCOMBUFR(JL+(JFLD-IRECV_FLD_START)*ILEN,INR) ENDDO ENDDO !$OMP END PARALLEL DO - IPOS = ILEN*(IRECV_FLD_END-IRECV_FLD_START+1) ENDDO IF (NTRANS_SYNC_LEVEL <= 1) THEN @@ -965,6 +350,8 @@ SUBROUTINE TRGTOL_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& CALL GSTATS_BARRIER2(761) +END ASSOCIATE + END SUBROUTINE TRGTOL_COMM END MODULE TRGTOL_MOD diff --git a/src/trans/cpu/internal/trltog_mod.F90 b/src/trans/cpu/internal/trltog_mod.F90 index 83c33bcdf..bf0c43f37 100644 --- a/src/trans/cpu/internal/trltog_mod.F90 +++ b/src/trans/cpu/internal/trltog_mod.F90 @@ -1,6 +1,6 @@ ! (C) Copyright 1995- ECMWF. ! (C) Copyright 1995- Meteo-France. -! +! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. ! In applying this licence, ECMWF does not waive the privileges and immunities @@ -10,13 +10,14 @@ MODULE TRLTOG_MOD +IMPLICIT NONE + PUBLIC TRLTOG -PRIVATE TRLTOG_PROLOG, TRLTOG_COMM, TRLTOG_COMM_HEAP, TRLTOG_COMM_STACK +PRIVATE TRLTOG_COMM CONTAINS -SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& - &PGP,PGPUV,PGP3A,PGP3B,PGP2) +SUBROUTINE TRLTOG(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2) !**** *TRLTOG * - head routine for transposition of grid point data from latitudinal ! to column structure (this takes place between inverse @@ -60,15 +61,14 @@ SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& USE PARKIND1 ,ONLY : JPIM ,JPRB USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS +USE TPM_DISTR ,ONLY : D +USE TRGL_MOD, ONLY: TRGL_BUFFERS, ALLOCATE_BUFFERS_CST, TRGL_PROLOG, ALLOCATE_BUFFERS_SR IMPLICIT NONE +REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS_G -REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) @@ -77,18 +77,7 @@ SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) -INTEGER(KIND=JPIM) :: ISENDCOUNT -INTEGER(KIND=JPIM) :: IRECVCOUNT -INTEGER(KIND=JPIM) :: INSEND -INTEGER(KIND=JPIM) :: INRECV -INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) -INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) -INTEGER(KIND=JPIM) :: ISEND (NPROC) -INTEGER(KIND=JPIM) :: IRECV (NPROC) -INTEGER(KIND=JPIM) :: IINDEX(D%NLENGTF) -INTEGER(KIND=JPIM) :: INDOFF(NPROC) -INTEGER(KIND=JPIM) :: IGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM) :: ISETWL(NPROC), ISETVL(NPROC) +TYPE(TRGL_BUFFERS) :: YDBUFS REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -96,20 +85,15 @@ SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& IF (LHOOK) CALL DR_HOOK('TRLTOG',0,ZHOOK_HANDLE) -CALL TRLTOG_PROLOG(KF_FS,KF_GP,KVSET,& - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & - & ISETWL,ISETVL) -IF (NSTACK_MEMORY_TR==0) THEN - CALL TRLTOG_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2,& - & ISETWL,ISETVL) -ELSE - CALL TRLTOG_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET, & - & ISENDCOUNT,IRECVCOUNT,INSEND,INRECV,ISENDTOT,IRECVTOT,ISEND,IRECV,IINDEX,INDOFF,IGPTRSEND, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2,& - & ISETWL,ISETVL) -ENDIF +YDBUFS%LLTRGTOL = .FALSE. +CALL ALLOCATE_BUFFERS_CST(YDBUFS) +CALL GSTATS(1806, 0) +CALL TRGL_PROLOG(KF_FS, KF_GP, KVSET, YDBUFS) +CALL GSTATS(1806, 1) +CALL ALLOCATE_BUFFERS_SR(YDBUFS, KF_GP) + +CALL TRLTOG_COMM(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2, & + & YDBUFS) IF (LHOOK) CALL DR_HOOK('TRLTOG',1,ZHOOK_HANDLE) @@ -117,291 +101,9 @@ SUBROUTINE TRLTOG(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,KPTRGP,& END SUBROUTINE TRLTOG -SUBROUTINE TRLTOG_PROLOG(KF_FS,KF_GP,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND, & - & KSETWL,KSETVL) - -!**** *TRLTOG_PROLOG * - prolog for transposition of grid point data from latitudinal -! to column structure (this takes place between inverse -! FFT and grid point calculations) : the purpose is essentially -! to compute the size of communication buffers in order to enable -! the use of automatic arrays later. -! TRLTOG_PROLOG is the inverse of TRGTOL_PROLOG - -! Purpose. -! -------- - -!** Interface. -! ---------- -! *call* *TRLTOG_PROLOG(...) - -! Explicit arguments : -! -------------------- -! KVSET - "v-set" for each field (input) - -! Implicit arguments : -! -------------------- - -! Method. -! ------- -! See documentation - -! Externals. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! R. El Khatib *Meteo-France* - -! Modifications. -! -------------- -! Original : 18-Aug-2014 from trltog -! ------------------------------------------------------------------ - -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DISTR ,ONLY : D, MYSETW, NPRTRNS, MYPROC, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -USE INIGPTR_MOD ,ONLY : INIGPTR -USE PE2SET_MOD ,ONLY : PE2SET -! - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP -INTEGER(KIND=JPIM),INTENT(IN) :: KVSET(KF_GP) - -INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(OUT) :: KNSEND -INTEGER(KIND=JPIM), INTENT(OUT) :: KNRECV -INTEGER(KIND=JPIM), INTENT(OUT) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(OUT) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM), INTENT(OUT) :: KSETWL(NPROC) -INTEGER(KIND=JPIM), INTENT(OUT) :: KSETVL(NPROC) - -INTEGER(KIND=JPIM) :: IGPTRRECV(NPRTRNS) -INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILASTLAT, IPOS, ISETA, ISETB, ISETV -INTEGER(KIND=JPIM) :: ISEND, JFLD, JGL, JL, ISETW, JROC, J -INTEGER(KIND=JPIM) :: INDOFFX,IBUFLENS,IBUFLENR - -! ------------------------------------------------------------------ - -!* 0. Some initializations -! -------------------- - -CALL GSTATS(1806,0) - -CALL INIGPTR(KGPTRSEND,IGPTRRECV) - -INDOFFX = 0 -IBUFLENS = 0 -IBUFLENR = 0 -KNRECV = 0 -KNSEND = 0 - -DO JROC=1,NPROC - - CALL PE2SET(JROC,ISETA,ISETB,KSETWL(JROC),KSETVL(JROC)) - ISEND = JROC - ISETW=KSETWL(JROC) - ISETV=KSETVL(JROC) - ! count up expected number of fields - IPOS = 0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1) IPOS = IPOS+1 - ENDDO - KRECVTOT(JROC) = IGPTRRECV(ISETW)*IPOS - IF(KRECVTOT(JROC) > 0 .AND. MYPROC /= JROC) THEN - KNRECV = KNRECV + 1 - KRECV(KNRECV)=JROC - ENDIF - - IF( JROC /= MYPROC) IBUFLENR = MAX(IBUFLENR,KRECVTOT(JROC)) - - IFIRSTLAT = MAX(D%NPTRLS(MYSETW),D%NFRSTLAT(ISETA)) - ILASTLAT = MIN(D%NPTRLS(MYSETW)+D%NULTPP(MYSETW)-1,D%NLSTLAT(ISETA)) - - IPOS = 0 - DO JGL=IFIRSTLAT,ILASTLAT - IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) - IPOS = IPOS+D%NONL(IGL,ISETB) - ENDDO - - KSENDTOT(JROC) = IPOS*KF_FS - IF( JROC /= MYPROC) THEN - IBUFLENS = MAX(IBUFLENS,KSENDTOT(JROC)) - IF(KSENDTOT(JROC) > 0) THEN - KNSEND = KNSEND+1 - KSEND(KNSEND)=JROC - ENDIF - ENDIF - - IF(IPOS > 0) THEN - KNDOFF(JROC) = INDOFFX - INDOFFX = INDOFFX+IPOS - IPOS = 0 - DO JGL=IFIRSTLAT,ILASTLAT - IGL = D%NPTRFRSTLAT(ISETA)+JGL-D%NFRSTLAT(ISETA) - IGLL = JGL-D%NPTRLS(MYSETW)+1 - DO JL=D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL),& - &D%NSTA(IGL,ISETB)+D%NSTAGTF(IGLL)+D%NONL(IGL,ISETB)-1 - IPOS = IPOS+1 - KINDEX(IPOS+KNDOFF(JROC)) = JL - ENDDO - ENDDO - ENDIF -ENDDO - -KSENDCOUNT=0 -KRECVCOUNT=0 -DO J=1,NPROC - KSENDCOUNT=MAX(KSENDCOUNT,KSENDTOT(J)) - KRECVCOUNT=MAX(KRECVCOUNT,KRECVTOT(J)) -ENDDO - -CALL GSTATS(1806,1) - -END SUBROUTINE TRLTOG_PROLOG - -SUBROUTINE TRLTOG_COMM_HEAP(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & - & KSETWL,KSETVL) - -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -IMPLICIT NONE - -INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS,KF_GP -REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) -INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETWL(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETVL(NPROC) - -REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFS_HEAP(:,:) -REAL(KIND=JPRB), ALLOCATABLE, SAVE :: ZCOMBUFR_HEAP(:,:) -INTEGER(KIND=JPIM), SAVE :: INRECV_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: INSEND_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: IRECVCOUNT_PREV = -1 -INTEGER(KIND=JPIM), SAVE :: ISENDCOUNT_PREV = -1 - -IF ( .NOT. ALLOCATED(ZCOMBUFS_HEAP) ) THEN - ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND)) - ISENDCOUNT_PREV = KSENDCOUNT - INSEND_PREV = KNSEND -ELSEIF ( KSENDCOUNT /= ISENDCOUNT_PREV .OR. KNSEND /= INSEND_PREV ) THEN - DEALLOCATE(ZCOMBUFS_HEAP) - ALLOCATE(ZCOMBUFS_HEAP(-1:KSENDCOUNT,KNSEND)) - ISENDCOUNT_PREV = KSENDCOUNT - INSEND_PREV = KNSEND -ENDIF - -! Now, force the OS to allocate this shared array right now, not when it starts to be used which is -! an OPEN-MP loop, that would cause a threads synchronization lock : -IF (KNSEND > 0 .AND. KSENDCOUNT >= -1) ZCOMBUFS_HEAP(-1,1) = HUGE(1._JPRB) - -IF ( .NOT. ALLOCATED(ZCOMBUFR_HEAP) ) THEN - ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV)) - IRECVCOUNT_PREV = KRECVCOUNT - INRECV_PREV = KNRECV -ELSEIF ( KRECVCOUNT /= IRECVCOUNT_PREV .OR. KNRECV /= INRECV_PREV ) THEN - DEALLOCATE(ZCOMBUFR_HEAP) - ALLOCATE(ZCOMBUFR_HEAP(-1:KRECVCOUNT,KNRECV)) - IRECVCOUNT_PREV = KRECVCOUNT - INRECV_PREV = KNRECV -ENDIF - -CALL TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & ZCOMBUFS_HEAP,ZCOMBUFR_HEAP, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & - & KSETWL,KSETVL) - -END SUBROUTINE TRLTOG_COMM_HEAP +SUBROUTINE TRLTOG_COMM(PGLAT, KF_FS, KF_GP, KF_SCALARS_G, KVSET, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, & + & PGP2,YDBUFS) -SUBROUTINE TRLTOG_COMM_STACK(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & - & KSETWL,KSETVL) - -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPROC -USE TPM_TRANS ,ONLY : NGPBLKS - -IMPLICIT NONE - -INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS,KF_GP -REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) -INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETWL(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETVL(NPROC) - -REAL(KIND=JPRB) :: ZCOMBUFS_STACK(-1:KSENDCOUNT,KNSEND) -REAL(KIND=JPRB) :: ZCOMBUFR_STACK(-1:KRECVCOUNT,KNRECV) - -CALL TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & ZCOMBUFS_STACK,ZCOMBUFR_STACK, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & - & KSETWL,KSETVL) - -END SUBROUTINE TRLTOG_COMM_STACK - -SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& - & KSENDCOUNT,KRECVCOUNT,KNSEND,KNRECV,KSENDTOT,KRECVTOT,KSEND,KRECV,KINDEX,KNDOFF,KGPTRSEND,& - & PCOMBUFS,PCOMBUFR, & - & KPTRGP,PGP,PGPUV,PGP3A,PGP3B,PGP2, & - & KSETWL,KSETVL) - !**** *trltog * - transposition of grid point data from latitudinal ! to column structure. This takes place between inverse @@ -458,19 +160,18 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& ! R. El Khatib 09-Sep-2020 64 bits addressing for PGLAT ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPIB +USE PARKIND1 ,ONLY : JPIM ,JPRB USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, JP_NON_BLOCKING_STANDARD, MPL_WAITANY, & - & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED + & JP_BLOCKING_STANDARD, MPL_BARRIER, JP_BLOCKING_BUFFERED -USE TPM_GEN ,ONLY : NOUT, NTRANS_SYNC_LEVEL -USE TPM_DISTR ,ONLY : D, MYSETV, MYSETW, MTAGLG, & - & NPRCIDS, NPRTRNS, MYPROC, NPROC -USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NGPBLKS +USE TPM_GEN ,ONLY : NTRANS_SYNC_LEVEL, NSTACK_MEMORY_TR +USE TPM_DISTR ,ONLY : D, MTAGLG, NPRCIDS, MYPROC, NPROC -USE PE2SET_MOD ,ONLY : PE2SET -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TRGL_MOD, ONLY: TRGL_BUFFERS, TRGL_VARS, TRGL_ALLOCATE_VARS, TRGL_ALLOCATE_HEAP_BUFFER, & + & TRGL_INIT_VARS, TRGL_INIT_OFF_VARS, TGRL_COPY_ZCOMBUF, TGRL_COPY_PGLAT, & + & TGRL_INIT_PACKING_VARS IMPLICIT NONE @@ -479,54 +180,35 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& REAL(KIND=JPRB),INTENT(IN) :: PGLAT(KF_FS,D%NLENGTF) INTEGER(KIND=JPIM), INTENT(IN) :: KVSET(KF_GP) INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVCOUNT -INTEGER(KIND=JPIM), INTENT(IN) :: KNSEND -INTEGER(KIND=JPIM), INTENT(IN) :: KNRECV -INTEGER(KIND=JPIM), INTENT(IN) :: KSENDTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECVTOT (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSEND (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KRECV (NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KINDEX(D%NLENGTF) -INTEGER(KIND=JPIM), INTENT(IN) :: KNDOFF(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KGPTRSEND(2,NGPBLKS,NPRTRNS) -REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFS(-1:KSENDCOUNT,KNSEND) -REAL(KIND=JPRB), INTENT(INOUT) :: PCOMBUFR(-1:KRECVCOUNT,KNRECV) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) -REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETWL(NPROC) -INTEGER(KIND=JPIM), INTENT(IN) :: KSETVL(NPROC) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +TYPE (TRGL_BUFFERS), INTENT(INOUT), TARGET :: YDBUFS ! LOCAL VARIABLES +TYPE(TRGL_VARS) :: YLVARS -INTEGER(KIND=JPIM) :: IPOSPLUS(KNRECV) -INTEGER(KIND=JPIM) :: ISETW(KNRECV) -INTEGER(KIND=JPIM) :: JPOS(NGPBLKS,KNRECV) -INTEGER(KIND=JPIM) :: IFLDA(KF_GP,KNRECV) INTEGER(KIND=JPIM) :: IREQ_SEND(NPROC) INTEGER(KIND=JPIM) :: IREQ_RECV(NPROC) -INTEGER(KIND=JPIM) :: IFIRST, IFLD, ILAST, IPOS, IRECV, ISETV -INTEGER(KIND=JPIM) :: ISEND, ITAG, JBLK, JFLD, JK, JL, IFLDS, INR, INS -INTEGER(KIND=JPIM) :: II,ILEN, IFLDT, JI, JJ, J - -INTEGER(KIND=JPIB) :: JFLD64 - -LOGICAL :: LLPGPUV,LLPGP3A,LLPGP3B,LLPGP2,LLPGPONLY -LOGICAL :: LLUV(KF_GP),LLGP2(KF_GP),LLGP3A(KF_GP),LLGP3B(KF_GP) -LOGICAL :: LLINDER -INTEGER(KIND=JPIM) :: IUVLEVS(KF_GP),IUVPARS(KF_GP),IGP2PARS(KF_GP) -INTEGER(KIND=JPIM) :: IGP3APARS(KF_GP),IGP3ALEVS(KF_GP),IGP3BPARS(KF_GP),IGP3BLEVS(KF_GP) -INTEGER(KIND=JPIM) :: IUVPAR,IUVLEV,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF -INTEGER(KIND=JPIM) :: IOFF,IOFF1,IOFFNS,IOFFEW,J1,J2, JNR -INTEGER(KIND=JPIM) :: IFLDOFF(KF_FS) -INTEGER(KIND=JPIM) :: IRECV_FLD_START,IRECV_FLD_END +! LOCAL INTEGER SCALARS +INTEGER(KIND=JPIM) :: IRECV +INTEGER(KIND=JPIM) :: ISEND, ITAG, JL, JFLD, INS, INR, JNR +INTEGER(KIND=JPIM) :: II,ILEN INTEGER(KIND=JPIM) :: ISEND_FLD_START,ISEND_FLD_END -INTEGER(KIND=JPIM) :: IGPTROFF(NGPBLKS) + +! LOCAL ARRAYS +REAL(KIND=JPRB), TARGET :: ZCOMBUFS_STACK(-1:YDBUFS%ISENDCOUNT,MERGE (YDBUFS%INSEND,0,NSTACK_MEMORY_TR/=0)) +REAL(KIND=JPRB), TARGET :: ZCOMBUFR_STACK(-1:YDBUFS%IRECVCOUNT,MERGE (YDBUFS%INRECV,0,NSTACK_MEMORY_TR/=0)) + +REAL(KIND=JPRB), ALLOCATABLE, TARGET, SAVE :: ZCOMBUFS_HEAP(:,:) +REAL(KIND=JPRB), ALLOCATABLE, TARGET, SAVE :: ZCOMBUFR_HEAP(:,:) + +REAL(KIND=JPRB), POINTER, CONTIGUOUS :: ZCOMBUFS(:,:) +REAL(KIND=JPRB), POINTER, CONTIGUOUS :: ZCOMBUFR(:,:) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE_BAR @@ -534,8 +216,25 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& !* 0. Some initializations ! -------------------- +ASSOCIATE(KNSEND=>YDBUFS%INSEND, KNRECV=>YDBUFS%INRECV, KSENDTOT=>YDBUFS%ISENDTOT, & + & KRECVTOT=>YDBUFS%IRECVTOT, KSEND=>YDBUFS%ISEND, KRECV=>YDBUFS%IRECV, & + & KINDEX=>YDBUFS%IINDEX, KNDOFF=>YDBUFS%INDOFF) + +IF (NSTACK_MEMORY_TR == 0) THEN + CALL TRGL_ALLOCATE_HEAP_BUFFER(ZCOMBUFS_HEAP, YDBUFS%ISENDCOUNT, YDBUFS%INSEND) + CALL TRGL_ALLOCATE_HEAP_BUFFER(ZCOMBUFR_HEAP, YDBUFS%IRECVCOUNT, YDBUFS%INRECV) + +! Now, force the OS to allocate this shared array right now, not when it starts to be used which is +! an OPEN-MP loop, that would cause a threads synchronization lock : + IF (YDBUFS%INSEND > 0 .AND. YDBUFS%ISENDCOUNT >=-1) ZCOMBUFS_HEAP(-1,1)=HUGE(1._JPRB) + ZCOMBUFS (-1:,1:) => ZCOMBUFS_HEAP + ZCOMBUFR (-1:,1:) => ZCOMBUFR_HEAP +ELSE + ZCOMBUFS (-1:,1:) => ZCOMBUFS_STACK + ZCOMBUFR (-1:,1:) => ZCOMBUFR_STACK +ENDIF -ITAG = MTAGLG +ITAG = MTAGLG IF (LHOOK) CALL DR_HOOK('TRLTOG_BAR',0,ZHOOK_HANDLE_BAR) CALL GSTATS_BARRIER(762) @@ -547,278 +246,29 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& !...Receive loop......................................................... DO INR=1,KNRECV IRECV=KRECV(INR) - CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), & - & KSOURCE=NPRCIDS(IRECV), & - & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_RECV(INR), & - & KTAG=ITAG,CDSTRING='TRLTOG_COMM: NON-BLOCKING IRECV' ) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD, KREQUEST=IREQ_RECV(INR), KTAG=ITAG, & + & CDSTRING='TRLTOG_COMM: NON-BLOCKING IRECV' ) ENDDO ENDIF CALL GSTATS(805,1) CALL GSTATS(1806,0) -LLINDER = .FALSE. -LLPGPUV = .FALSE. -LLPGP3A = .FALSE. -LLPGP3B = .FALSE. -LLPGP2 = .FALSE. -LLPGPONLY = .FALSE. -IF(PRESENT(KPTRGP)) LLINDER = .TRUE. -IF(PRESENT(PGP)) LLPGPONLY=.TRUE. -IF(PRESENT(PGPUV)) LLPGPUV=.TRUE. -IF(PRESENT(PGP3A)) LLPGP3A=.TRUE. -IF(PRESENT(PGP3B)) LLPGP3B=.TRUE. -IF(PRESENT(PGP2)) LLPGP2=.TRUE. - -IUVPAR=0 -IUVLEV=0 -IOFF1=0 -IOFFNS=KF_SCALARS_G -IOFFEW=2*KF_SCALARS_G - -LLUV(:) = .FALSE. -IF (LLPGPUV) THEN - IOFF=0 - IUVLEV=UBOUND(PGPUV,2) - IF(LVORGP) THEN - IUVPAR=IUVPAR+1 - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR - LLUV(IOFF+J)=.TRUE. - ENDDO - IOFF=IOFF+IUVLEV - ENDIF - IF(LDIVGP) THEN - IUVPAR=IUVPAR+1 - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR - LLUV(IOFF+J)=.TRUE. - ENDDO - IOFF=IOFF+IUVLEV - ENDIF - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR+1 - IUVLEVS(IOFF+J+IUVLEV)=J - IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 - ENDDO - IUVPAR=IUVPAR+2 - LLUV(IOFF+1:IOFF+2*IUVLEV)=.TRUE. - IOFF=IOFF+2*IUVLEV - IOFF1=IOFF - IOFFNS=IOFFNS+IOFF - IOFFEW=IOFFEW+IOFF - - IOFF=IUVPAR*IUVLEV+KF_SCALARS_G - IF(LUVDER) THEN - IF(LSCDERS) IOFF=IOFF+KF_SCALARS_G - DO J=1,IUVLEV - IUVLEVS(IOFF+J)=J - IUVPARS(IOFF+J)=IUVPAR+1 - LLUV(IOFF+J)=.TRUE. - IUVLEVS(IOFF+J+IUVLEV)=J - IUVPARS(IOFF+J+IUVLEV)=IUVPAR+2 - LLUV(IOFF+J+IUVLEV)=.TRUE. - ENDDO - IUVPAR=IUVPAR+2 - IOFF=IOFF+2*IUVLEV - IOFFEW=IOFFEW+2*IUVLEV - ENDIF -ENDIF - -LLGP2(:)=.FALSE. -IF(LLPGP2) THEN - IOFF=IOFF1 - IGP2PAR=UBOUND(PGP2,2) - IF(LSCDERS) IGP2PAR=IGP2PAR/3 - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J - ENDDO - IOFF1=IOFF1+IGP2PAR - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J+IGP2PAR - ENDDO - IOFFNS=IOFF+IGP2PAR - IOFF=IOFFEW - DO J=1,IGP2PAR - LLGP2(J+IOFF) = .TRUE. - IGP2PARS(J+IOFF)=J+2*IGP2PAR - ENDDO - IOFFEW=IOFF+IGP2PAR - ENDIF -ENDIF - -LLGP3A(:) = .FALSE. -IF(LLPGP3A) THEN - IGP3ALEV=UBOUND(PGP3A,2) - IGP3APAR=UBOUND(PGP3A,3) - IF(LSCDERS) IGP3APAR=IGP3APAR/3 - IOFF=IOFF1 - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1 - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IGP3APAR - IOFF1=IOFF1+IGP3APAR*IGP3ALEV - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IPAROFF+IGP3APAR - IOFFNS=IOFFNS+IGP3APAR*IGP3ALEV - IOFF=IOFFEW - DO J1=1,IGP3APAR - DO J2=1,IGP3ALEV - LLGP3A(J2+(J1-1)*IGP3ALEV+IOFF) = .TRUE. - IGP3APARS(J2+(J1-1)*IGP3ALEV+IOFF)=J1+IPAROFF - IGP3ALEVS(J2+(J1-1)*IGP3ALEV+IOFF)=J2 - ENDDO - ENDDO - IOFFEW=IOFFEW+IGP3APAR*IGP3ALEV - ENDIF -ENDIF - -LLGP3B(:) = .FALSE. -IF(LLPGP3B) THEN - IGP3BLEV=UBOUND(PGP3B,2) - IGP3BPAR=UBOUND(PGP3B,3) - IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 - IOFF=IOFF1 - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1 - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IGP3BPAR - IOFF1=IOFF1+IGP3BPAR*IGP3BLEV - IF(LSCDERS) THEN - IOFF=IOFFNS - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IPAROFF=IPAROFF+IGP3BPAR - IOFFNS=IOFFNS+IGP3BPAR*IGP3BLEV - IOFF=IOFFEW - DO J1=1,IGP3BPAR - DO J2=1,IGP3BLEV - LLGP3B(J2+(J1-1)*IGP3BLEV+IOFF) = .TRUE. - IGP3BPARS(J2+(J1-1)*IGP3BLEV+IOFF)=J1+IPAROFF - IGP3BLEVS(J2+(J1-1)*IGP3BLEV+IOFF)=J2 - ENDDO - ENDDO - IOFFEW=IOFFEW+IGP3BPAR*IGP3BLEV - ENDIF -ENDIF - +YDBUFS%LLINDER = PRESENT(KPTRGP) +YDBUFS%LLPGPONLY = PRESENT(PGP) +CALL TRGL_ALLOCATE_VARS(YLVARS, KF_GP,KF_FS) +CALL TRGL_INIT_VARS(YLVARS, KF_SCALARS_G, PGP, PGPUV, PGP3A, PGP3B, PGP2) CALL GSTATS(1806,1) - ! Copy local contribution -IF( KRECVTOT(MYPROC) > 0 )THEN - IFLDS = 0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == MYSETV .OR. KVSET(JFLD) == -1) THEN - IFLDS = IFLDS+1 - IF(LLINDER) THEN - IFLDOFF(IFLDS) = KPTRGP(JFLD) - ELSE - IFLDOFF(IFLDS) = JFLD - ENDIF - ENDIF - ENDDO - - IPOS=0 - DO JBLK=1,NGPBLKS - IGPTROFF(JBLK)=IPOS - IFIRST = KGPTRSEND(1,JBLK,MYSETW) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,MYSETW) - IPOS=IPOS+ILAST-IFIRST+1 - ENDIF - ENDDO +IF(KSENDTOT(MYPROC) > 0 )THEN + CALL TRGL_INIT_OFF_VARS(YDBUFS,YLVARS,KVSET,KPTRGP,KF_GP) CALL GSTATS(1604,0) -#ifdef __NEC__ - ! Loops inversion is still better on Aurora machines, according to CHMI. REK. - !$OMP PARALLEL DO SCHEDULE(DYNAMIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) -#else - !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD64,JBLK,JK,IFLD,IPOS,IFIRST,ILAST) -#endif - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,MYSETW) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,MYSETW) - ! Address PGLAT over 64 bits because its size may exceed 2 GB for big data and - ! small number of tasks. - IF(LLPGPONLY) THEN - DO JFLD64=1,IFLDS - IFLD = IFLDOFF(JFLD64) - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGP(JK,IFLD,JBLK) = PGLAT(JFLD64,KINDEX(IPOS)) - ENDDO - ENDDO - ELSE - DO JFLD64=1,IFLDS - IFLD = IFLDOFF(JFLD64) - IF(LLUV(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGPUV(JK,IUVLEVS(IFLD),IUVPARS(IFLD),JBLK) = PGLAT(JFLD64,KINDEX(IPOS)) - ENDDO - ELSEIF(LLGP2(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGP2(JK,IGP2PARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) - ENDDO - ELSEIF(LLGP3A(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGP3A(JK,IGP3ALEVS(IFLD),IGP3APARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) - ENDDO - ELSEIF(LLGP3B(IFLD)) THEN - !DIR$ VECTOR ALWAYS - DO JK=IFIRST,ILAST - IPOS = KNDOFF(MYPROC)+IGPTROFF(JBLK)+JK-IFIRST+1 - PGP3B(JK,IGP3BLEVS(IFLD),IGP3BPARS(IFLD),JBLK)=PGLAT(JFLD64,KINDEX(IPOS)) - ENDDO - ELSE - WRITE(NOUT,*)'TRLTOG_MOD: ERROR',JFLD64,IFLD - CALL ABORT_TRANS('TRLTOG_MOD: ERROR') - ENDIF - ENDDO - ENDIF - ENDIF - ENDDO - !$OMP END PARALLEL DO + CALL TGRL_COPY_PGLAT(PGLAT, YDBUFS, YLVARS, PGP, PGPUV,PGP3A, PGP3B,PGP2) CALL GSTATS(1604,1) - ENDIF - ! ! loop over the number of processors we need to communicate with. ! NOT MYPROC @@ -839,112 +289,40 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& DO JL=1,ILEN II = KINDEX(KNDOFF(ISEND)+JL) DO JFLD=ISEND_FLD_START,ISEND_FLD_END - PCOMBUFS((JFLD-ISEND_FLD_START)*ILEN+JL,INS) = PGLAT(JFLD,II) + ZCOMBUFS((JFLD-ISEND_FLD_START)*ILEN+JL,INS) = PGLAT(JFLD,II) ENDDO ENDDO !$OMP END PARALLEL DO - PCOMBUFS(-1,INS) = 1 - PCOMBUFS(0,INS) = KF_FS + ZCOMBUFS(-1,INS) = 1 + ZCOMBUFS(0,INS) = KF_FS IF (NTRANS_SYNC_LEVEL <= 1) THEN - CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& - & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ_SEND(INS), & - & KTAG=ITAG,CDSTRING='TRLTOG_COMM: NON-BLOCKING ISEND') + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS), KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD, KREQUEST=IREQ_SEND(INS), KTAG=ITAG, & + & CDSTRING='TRLTOG_COMM: NON-BLOCKING ISEND') ELSE - CALL MPL_SEND(PCOMBUFS(-1:KSENDTOT(ISEND),INS),KDEST=NPRCIDS(ISEND),& - & KMP_TYPE=JP_BLOCKING_BUFFERED, & - & KTAG=ITAG,CDSTRING='TRLTOG_COMM: BLOCKING BUFFERED BSEND') + CALL MPL_SEND(ZCOMBUFS(-1:KSENDTOT(ISEND),INS), KDEST=NPRCIDS(ISEND), & + & KMP_TYPE=JP_BLOCKING_BUFFERED, KTAG=ITAG, & + & CDSTRING='TRLTOG_COMM: BLOCKING BUFFERED BSEND') ENDIF ENDDO ! Unpack loop......................................................... -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INR,IRECV,ISETV,IFLD,JFLD,IPOS,JBLK,IFIRST,ILAST) -DO INR=1,KNRECV - IRECV=KRECV(INR) - - ISETW(INR)=KSETWL(IRECV) - ISETV=KSETVL(IRECV) - IFLD = 0 - DO JFLD=1,KF_GP - IF(KVSET(JFLD) == ISETV .OR. KVSET(JFLD) == -1 ) THEN - IFLD = IFLD+1 - IFLDA(IFLD,INR)=JFLD - ENDIF - ENDDO - IPOS = 0 - IPOSPLUS(INR)=0 - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,ISETW(INR)) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,ISETW(INR)) - JPOS(JBLK,INR)=IPOS - IPOSPLUS(INR)=IPOSPLUS(INR)+(ILAST-IFIRST+1) - IPOS=IPOS+(ILAST-IFIRST+1) - ENDIF - ENDDO -ENDDO -!$OMP END PARALLEL DO +CALL TGRL_INIT_PACKING_VARS(YDBUFS,YLVARS, KVSET, KF_GP) DO JNR=1,KNRECV - + IF (NTRANS_SYNC_LEVEL <= 0) THEN - CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV),KINDEX=INR,& - & CDSTRING='TRLTOG_COMM: WAIT FOR ANY RECEIVES') + CALL MPL_WAITANY(KREQUEST=IREQ_RECV(1:KNRECV), KINDEX=INR, & + & CDSTRING='TRLTOG_COMM: WAIT FOR ANY RECEIVES') ELSE INR = JNR IRECV=KRECV(INR) - CALL MPL_RECV(PCOMBUFR(-1:KRECVTOT(IRECV),INR), & - & KSOURCE=NPRCIDS(IRECV), & - & KMP_TYPE=JP_BLOCKING_STANDARD, & - & KTAG=ITAG,CDSTRING='TRLTOG_COMM: BLOCKING RECV' ) + CALL MPL_RECV(ZCOMBUFR(-1:KRECVTOT(IRECV),INR), KSOURCE=NPRCIDS(IRECV), & + & KMP_TYPE=JP_BLOCKING_STANDARD, KTAG=ITAG, CDSTRING='TRLTOG_COMM: BLOCKING RECV') ENDIF - IPOS=IPOSPLUS(INR) - IRECV_FLD_START = PCOMBUFR(-1,INR) - IRECV_FLD_END = PCOMBUFR(0,INR) - - !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(IFLDT,IFIRST,ILAST,JK,JJ,JI,JBLK) - DO JJ=IRECV_FLD_START,IRECV_FLD_END - IFLDT=IFLDA(JJ,INR) - DO JBLK=1,NGPBLKS - IFIRST = KGPTRSEND(1,JBLK,ISETW(INR)) - IF(IFIRST > 0) THEN - ILAST = KGPTRSEND(2,JBLK,ISETW(INR)) - IF(LLINDER) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGP(JK,KPTRGP(IFLDT),JBLK) = PCOMBUFR(JI,INR) - ENDDO - ELSEIF(LLPGPONLY) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGP(JK,IFLDT,JBLK) = PCOMBUFR(JI,INR) - ENDDO - ELSEIF(LLUV(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGPUV(JK,IUVLEVS(IFLDT),IUVPARS(IFLDT),JBLK) = PCOMBUFR(JI,INR) - ENDDO - ELSEIF(LLGP2(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGP2(JK,IGP2PARS(IFLDT),JBLK) = PCOMBUFR(JI,INR) - ENDDO - ELSEIF(LLGP3A(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGP3A(JK,IGP3ALEVS(IFLDT),IGP3APARS(IFLDT),JBLK) = PCOMBUFR(JI,INR) - ENDDO - ELSEIF(LLGP3B(IFLDT)) THEN - DO JK=IFIRST,ILAST - JI=(JJ-IRECV_FLD_START)*IPOS+JPOS(JBLK,INR)+JK-IFIRST+1 - PGP3B(JK,IGP3BLEVS(IFLDT),IGP3BPARS(IFLDT),JBLK) = PCOMBUFR(JI,INR) - ENDDO - ENDIF - ENDIF - ENDDO - ENDDO - !$OMP END PARALLEL DO + CALL TGRL_COPY_ZCOMBUF(YDBUFS, YLVARS, INR, ZCOMBUFR, KPTRGP, PGP, PGPUV, PGP3A, PGP3B, PGP2) ENDDO IF (NTRANS_SYNC_LEVEL <= 1) THEN @@ -961,5 +339,7 @@ SUBROUTINE TRLTOG_COMM(PGLAT,KF_FS,KF_GP,KF_SCALARS_G,KVSET,& CALL GSTATS_BARRIER2(762) +END ASSOCIATE + END SUBROUTINE TRLTOG_COMM END MODULE TRLTOG_MOD diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index 772bfbdc7..72056b2fd 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -126,6 +126,7 @@ s/trans_release( *($|\(| |\*|\.h))/trans_release_VARIANTDESIGNATOR\1/g s/TRANS_RELEASE/TRANS_RELEASE_VARIANTDESIGNATOR/g s/TRGTOL_MOD/TRGTOL_MOD_VARIANTDESIGNATOR/g s/TRLTOG_MOD/TRLTOG_MOD_VARIANTDESIGNATOR/g +s/TRGL_MOD/TRGL_MOD_VARIANTDESIGNATOR/g s/TRLTOM_MOD/TRLTOM_MOD_VARIANTDESIGNATOR/g s/TRLTOMAD_MOD/TRLTOMAD_MOD_VARIANTDESIGNATOR/g s/TRMTOL_MOD/TRMTOL_MOD_VARIANTDESIGNATOR/g