diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 92ce4a89d..a3124c05e 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -120,7 +120,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_HANDLE_KIND + USE OPENACC, ONLY: ACC_IS_PRESENT, ACC_HANDLE_KIND USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -176,6 +176,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins... INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT = 0 + LOGICAL :: LUPDATE_PGP, LUPDATE_PGPUV, LUPDATE_PGP2, LUPDATE_PGP3A, LUPDATE_PGP3B + TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(2*NPROC) @@ -340,57 +342,77 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDIF CALL GSTATS(412,0) ACC_POINTERS_CNT = 0 + LUPDATE_PGP = .FALSE. IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) +#ifdef ACCGPU + LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) +#endif ENDIF + LUPDATE_PGPUV = .FALSE. IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) +#ifdef ACCGPU + LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) +#endif ENDIF + LUPDATE_PGP2 = .FALSE. IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) +#ifdef ACCGPU + LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) +#endif ENDIF + LUPDATE_PGP3A = .FALSE. IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) +#ifdef ACCGPU + LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) +#endif ENDIF + LUPDATE_PGP3B = .FALSE. IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) +#ifdef ACCGPU + LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) +#endif ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) !$ACC WAIT(1) - IF (PRESENT(PGP)) THEN + IF (LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP) #endif ENDIF - IF (PRESENT(PGPUV)) THEN + IF (LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGPUV) #endif ENDIF - IF (PRESENT(PGP2)) THEN + IF (LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP2) #endif ENDIF - IF (PRESENT(PGP3A)) THEN + IF (LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B)) THEN + IF (LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 242f701bf..24d32767e 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -121,7 +121,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE ISO_C_BINDING, ONLY: C_SIZE_T USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_HANDLE_KIND + USE OPENACC, ONLY: ACC_HANDLE_KIND, ACC_IS_PRESENT IMPLICIT NONE @@ -190,6 +190,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(NPROC*2) + LOGICAL :: LUPDATE_PGP, LUPDATE_PGPUV, LUPDATE_PGP2, LUPDATE_PGP3A, LUPDATE_PGP3B #ifdef PARKINDTRANS_SINGLE #define TRLTOG_DTYPE MPI_FLOAT @@ -487,25 +488,45 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif ACC_POINTERS_CNT = 0 + LUPDATE_PGP = .FALSE. IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) +#ifdef ACCGPU + LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) +#endif ENDIF + LUPDATE_PGPUV = .FALSE. IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) +#ifdef ACCGPU + LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) +#endif ENDIF + LUPDATE_PGP2 = .FALSE. IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) +#ifdef ACCGPU + LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) +#endif ENDIF + LUPDATE_PGP3A = .FALSE. IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) +#ifdef ACCGPU + LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) +#endif ENDIF + LUPDATE_PGP3B = .FALSE. IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) +#ifdef ACCGPU + LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) +#endif ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) #ifdef OMPGPU @@ -857,35 +878,35 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END DATA ! PGPUV !$ACC END DATA ! PGP #endif - IF (PRESENT(PGP)) THEN + IF (LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP) #endif ENDIF - IF (PRESENT(PGPUV)) THEN + IF (LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGPUV) #endif ENDIF - IF (PRESENT(PGP2)) THEN + IF (LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP2) #endif ENDIF - IF (PRESENT(PGP3A)) THEN + IF (LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B)) THEN + IF (LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU