Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ ecbuild_find_package( NAME fiat REQUIRED )

# Inherit MPI feature from FIAT (if you don't want MPI, rebuild FIAT with ENABLE_MPI=OFF)
set( HAVE_MPI ${fiat_HAVE_MPI} )
set( ectrans_HAVE_MPI ${fiat_HAVE_MPI} )

ecbuild_add_option( FEATURE OMP
DEFAULT ON
Expand Down
2 changes: 1 addition & 1 deletion src/programs/ectrans.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ info()
echo " flags : @EC_Fortran_FLAGS@"
echo ""
echo "Features:"
echo " MPI : @ectrans_HAVE_MPI@"
echo " MPI : @HAVE_MPI@"
echo " OMP : @ectrans_HAVE_OMP@"
echo " MKL : @ectrans_HAVE_MKL@"
echo " FFTW : @ectrans_HAVE_FFTW@"
Expand Down
9 changes: 7 additions & 2 deletions src/trans/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ else()
ecbuild_info("warn: HIP and CUDA not found")
endif()

if( HAVE_GPU_AWARE_MPI AND NOT fiat_HAVE_MPL_F08 )
set( USE_RAW_MPI 1 )
else()
set( USE_RAW_MPI 0)
endif()

set( GPU_LIBRARY_TYPE SHARED )
if( HAVE_GPU_STATIC )
Expand Down Expand Up @@ -148,14 +153,14 @@ foreach( prec dp sp )
PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES}
$<${HAVE_ACC}:OpenACC::OpenACC_Fortran>
$<${HAVE_OMP}:OpenMP::OpenMP_Fortran>
$<${HAVE_MPI}:MPI::MPI_Fortran>
$<${HAVE_GPU_AWARE_MPI}:MPI::MPI_Fortran>
PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU
$<${HAVE_CUTLASS}:USE_CUTLASS>
$<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32>
$<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM>
$<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT>
$<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI>
ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI}
$<${USE_RAW_MPI}:USE_RAW_MPI>
)

ectrans_target_fortran_module_directory(
Expand Down
28 changes: 17 additions & 11 deletions src/trans/gpu/internal/trgtol_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,

USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPIB
USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK
USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT
USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT, MPL_RECV, MPL_SEND
USE TPM_GEN, ONLY: LSYNC_TRANS, LMPOFF
USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS
USE TPM_DISTR, ONLY: D, MYSETV, MYSETW, MTAGLG, NPRCIDS, MYPROC, NPROC, NPRTRW, &
& NPRTRV
USE PE2SET_MOD, ONLY: PE2SET
USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML
USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML, JP_NON_BLOCKING_STANDARD
USE OML_MOD, ONLY: OML_MY_THREAD
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_REAL4, MPI_REAL8
! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157)
#endif
Expand Down Expand Up @@ -186,20 +186,20 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins...
INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT

#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
TYPE(MPI_COMM) :: LOCAL_COMM
TYPE(MPI_REQUEST) :: IREQUEST(2*NPROC)
#else
INTEGER(JPIM) :: IREQUEST(2*NPROC)
#endif



#ifdef PARKINDTRANS_SINGLE
#define TRGTOL_DTYPE MPI_REAL4
#else
#define TRGTOL_DTYPE MPI_REAL8
#endif

#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
IF(.NOT. LMPOFF) THEN
LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() )
ENDIF
Expand Down Expand Up @@ -673,25 +673,31 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO INR=1,IRECV_COUNTS
IR=IR+1
IPROC=IRECV_TO_PROC(INR)
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)),IRECVTOT_MPI(IPROC), &
& TRGTOL_DTYPE,NPRCIDS(IPROC)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR)
IREQ(IR) = IREQUEST(IR)%MPI_VAL
#else
CALL ABORT_TRANS("Should not be here: MPI is disabled")
CALL MPL_RECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)), &
& KSOURCE=NPRCIDS(IPROC), KTAG=MTAGLG, KMP_TYPE=JP_NON_BLOCKING_STANDARD, &
& KREQUEST=IREQUEST(IR))
IREQ(IR) = IREQUEST(IR)
#endif
ENDDO

!....Send loop.........................................................
DO INS=1,ISEND_COUNTS
IR=IR+1
ISEND=ISEND_TO_PROC(INS)
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT_MPI(ISEND), &
& TRGTOL_DTYPE,NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR)
IREQ(IR) = IREQUEST(IR)%MPI_VAL
#else
CALL ABORT_TRANS("Should not be here: MPI is disabled")
CALL MPL_SEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)), &
& KDEST=NPRCIDS(ISEND), KTAG=MTAGLG, KMP_TYPE=JP_NON_BLOCKING_STANDARD, &
& KREQUEST=IREQUEST(IR))
IREQ(IR) = IREQUEST(IR)
#endif
ENDDO

Expand Down
26 changes: 17 additions & 9 deletions src/trans/gpu/internal/trltog_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,

USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPIB
USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK
USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT
USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT, MPL_RECV, MPL_SEND
USE TPM_GEN, ONLY: LSYNC_TRANS, NERR, LMPOFF
USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS
USE TPM_DISTR, ONLY: D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV
USE PE2SET_MOD, ONLY: PE2SET
USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML
USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML, JP_NON_BLOCKING_STANDARD
USE OML_MOD, ONLY: OML_MY_THREAD
USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_REAL4, MPI_REAL8
! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157)
#endif
Expand Down Expand Up @@ -192,17 +192,19 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins...
INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT

#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
TYPE(MPI_COMM) :: LOCAL_COMM
TYPE(MPI_REQUEST) :: IREQUEST(NPROC*2)
#else
INTEGER(KIND=JPIM) :: IREQUEST(NPROC*2)
#endif

#ifdef PARKINDTRANS_SINGLE
#define TRLTOG_DTYPE MPI_REAL4
#else
#define TRLTOG_DTYPE MPI_REAL8
#endif
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
IF(.NOT. LMPOFF) THEN
LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() )
ENDIF
Expand Down Expand Up @@ -782,28 +784,34 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO INR=1,IRECV_COUNTS
IR=IR+1
IRECV=IRECV_TO_PROC(INR)
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)), &
& IRECVTOT_MPI(IRECV), &
& TRLTOG_DTYPE,NPRCIDS(IRECV)-1, &
& MTAGLG, LOCAL_COMM, IREQUEST(IR), &
& IERROR )
IREQ(IR) = IREQUEST(IR)%MPI_VAL
#else
CALL ABORT_TRANS("Should not be here: MPI is disabled")
CALL MPL_RECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)), &
& KSOURCE=NPRCIDS(IRECV), KTAG=MTAGLG, KMP_TYPE=JP_NON_BLOCKING_STANDARD, &
& KREQUEST=IREQUEST(IR))
IREQ(IR) = IREQUEST(IR)
#endif
ENDDO

!...Send loop.........................................................
DO INS=1,ISEND_COUNTS
IR=IR+1
ISEND=ISEND_TO_PROC(INS)
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT_MPI(ISEND), &
& TRLTOG_DTYPE, NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR)
IREQ(IR) = IREQUEST(IR)%MPI_VAL
#else
CALL ABORT_TRANS("Should not be here: MPI is disabled")
CALL MPL_SEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)), &
& KDEST=NPRCIDS(ISEND), KTAG=MTAGLG, KMP_TYPE=JP_NON_BLOCKING_STANDARD, &
& KREQUEST=IREQUEST(IR))
IREQ(IR) = IREQUEST(IR)
#endif
ENDDO

Expand Down
14 changes: 8 additions & 6 deletions src/trans/gpu/internal/trltom_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS)

USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB
USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK
USE MPL_MODULE, ONLY: MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK
USE MPL_MODULE, ONLY: MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK, MPL_ALLTOALLV
USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYSETW
USE TPM_GEN, ONLY: LSYNC_TRANS, NERR, LMPOFF
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
USE MPI_F08, ONLY: MPI_COMM, MPI_REAL4, MPI_REAL8
! Missing: MPI_ALLTOALLV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157)
#endif
Expand All @@ -116,7 +116,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS)

TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
TYPE(TRLTOM_HANDLE), INTENT(IN) :: HTRLTOM
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
TYPE(MPI_COMM) :: LOCAL_COMM
#endif

Expand All @@ -126,7 +126,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS)
#define TRLTOM_DTYPE MPI_REAL8
#endif

#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
IF(.NOT. LMPOFF) THEN
LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM
ENDIF
Expand Down Expand Up @@ -203,12 +203,14 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS)
!$ACC UPDATE HOST(PFBUF_IN,PFBUF)
#endif
#endif
#if ECTRANS_HAVE_MPI
#ifdef USE_RAW_MPI
CALL MPI_ALLTOALLV(PFBUF_IN,ILENS,IOFFS,TRLTOM_DTYPE,&
& PFBUF,ILENR,IOFFR, TRLTOM_DTYPE, &
& LOCAL_COMM,IERROR)
#else
CALL ABORT_TRANS("Should not be here: MPI is disabled")
CALL MPL_ALLTOALLV(PSENDBUF=PFBUF_IN, KSENDCOUNTS=ILENS, PRECVBUF=PFBUF, KRECVCOUNTS=ILENR, &
& KSENDDISPL=IOFFS, KRECVDISPL=IOFFR, KCOMM=MPL_ALL_MS_COMM, &
& CDSTRING='TRLTOM:')
#endif
#ifdef USE_GPU_AWARE_MPI
#ifdef OMPGPU
Expand Down
Loading
Loading