diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fc4575b6..126e2244c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,14 +161,15 @@ ecbuild_add_option( FEATURE GPU_STATIC DEFAULT ${GPU_STATIC_DEFAULT} DESCRIPTION "Compile GPU library as static library") +# Note: ETRANS GPU does not support OpenMP yet or FFT graphs yet ecbuild_add_option( FEATURE ETRANS DEFAULT OFF + CONDITION NOT HAVE_GPU OR ( HAVE_ACC AND NOT HAVE_GPU_GRAPHS_FFT) DESCRIPTION "Include Limited-Area-Model Transforms" ) - - + ecbuild_add_option( FEATURE ECTRANS4PY DEFAULT OFF - CONDITION HAVE_ETRANS AND HAVE_DOUBLE_PRECISION + CONDITION HAVE_ETRANS AND HAVE_DOUBLE_PRECISION DESCRIPTION "Compile ectrans4py interface routines for python binding w/ ctypesForFortran" ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 706806cd8..ae19e50cd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,12 +8,12 @@ add_subdirectory( trans ) add_subdirectory( programs ) -if( HAVE_TRANSI ) - add_subdirectory(transi) -endif() if( HAVE_ETRANS ) add_subdirectory(etrans) endif() +if( HAVE_TRANSI ) + add_subdirectory(transi) +endif() if(HAVE_ECTRANS4PY) add_subdirectory(ectrans4py) endif() diff --git a/src/etrans/CMakeLists.txt b/src/etrans/CMakeLists.txt index 8eae7343f..62182dd91 100644 --- a/src/etrans/CMakeLists.txt +++ b/src/etrans/CMakeLists.txt @@ -1,5 +1,3 @@ - - # (C) Copyright 2020- ECMWF. # # This software is licensed under the terms of the Apache Licence Version 2.0 @@ -66,7 +64,7 @@ function(generate_backend_includes) get_filename_component(outfile_name_we ${file_i} NAME_WE) get_filename_component(outfile_ext ${file_i} EXT) get_filename_component(outfile_dir ${file_i} DIRECTORY) - if (${file_i} IN_LIST ectrans_common_includes) + if (${file_i} IN_LIST ectrans_lam_common_includes) configure_file(${_PAR_INCLUDE_DIRECTORY}/${file_i} ${destination}/${outfile_name}) else() set(outfile "${destination}/${outfile_name_we}_${backend}${outfile_ext}") @@ -94,61 +92,13 @@ function(generate_backend_includes) target_include_directories(${_PAR_TARGET} INTERFACE $) endfunction(generate_backend_includes) - - - - -# TODO: move precision-independent files to common -#add_subdirectory( common ) +add_subdirectory( common ) if( HAVE_CPU) add_subdirectory( cpu ) endif() # placeholder -#if( HAVE_GPU ) -# add_subdirectory( gpu ) -#endif() - - -if (FALSE) -# original cmake file for etrans; keeping it for reference, but should be cleaned later -message(FATAL_ERROR "Hold it right there!") - -# build list of sources to add to trans library -# (using CMAKE_CURRENT_SOURCE_DIR is necessary because sources are in a different directory than the target library (trans_${prec}) -ecbuild_list_add_pattern( LIST etrans_src - GLOB - ${CMAKE_CURRENT_SOURCE_DIR}/biper/internal/* - ${CMAKE_CURRENT_SOURCE_DIR}/biper/external/* - ${CMAKE_CURRENT_SOURCE_DIR}/etrans/internal/* - ${CMAKE_CURRENT_SOURCE_DIR}/etrans/external/* - QUIET - ) - -# dummies to be able to loop over precisions -set( HAVE_dp ${HAVE_DOUBLE_PRECISION} ) -set( HAVE_sp ${HAVE_SINGLE_PRECISION} ) - -# loop over precisions -foreach( prec sp dp ) - if( HAVE_${prec} ) - # add sources - target_sources(trans_${prec} PRIVATE ${etrans_src}) - # add include directories - target_include_directories(trans_${prec} - PUBLIC - $ - $ - ) - endif() -endforeach() - -# install headers -file( GLOB etrans_interface biper/include/* etrans/include/*) -install( - FILES ${etrans_interface} - DESTINATION include/ectrans -) - -endif() \ No newline at end of file +if( HAVE_GPU ) + add_subdirectory( gpu ) +endif() diff --git a/src/etrans/common/CMakeLists.txt b/src/etrans/common/CMakeLists.txt new file mode 100644 index 000000000..293746a98 --- /dev/null +++ b/src/etrans/common/CMakeLists.txt @@ -0,0 +1,44 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +## Sources which are precision independent can go into a common library +list( APPEND ectrans_lam_common_src + internal/tpmald_distr.F90 + internal/tpmald_dim.F90 + internal/esetup_dims_mod.F90 + internal/esetup_geom_mod.F90 + internal/suemplat_mod.F90 + internal/suemplatb_mod.F90 + internal/ellips.F90 +) +list( APPEND ectrans_lam_common_includes +) + +ecbuild_add_library( + TARGET ectrans_lam_common + LINKER_LANGUAGE Fortran + SOURCES ${ectrans_lam_common_src} + PUBLIC_LIBS fiat ectrans_common + PRIVATE_LIBS ${LAPACK_LIBRARIES} + PUBLIC_INCLUDES $ + $ + $ + $ +) +ectrans_target_fortran_module_directory( + TARGET ectrans_lam_common + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans +) + +if( HAVE_OMP ) + ecbuild_debug("target_link_libraries( ectrans_common PRIVATE OpenMP::OpenMP_Fortran )") + target_link_libraries( ectrans_lam_common PRIVATE OpenMP::OpenMP_Fortran ) +endif() + +set( ectrans_lam_common_includes ${ectrans_lam_common_includes} PARENT_SCOPE ) diff --git a/src/etrans/cpu/internal/ellips.F90 b/src/etrans/common/internal/ellips.F90 similarity index 98% rename from src/etrans/cpu/internal/ellips.F90 rename to src/etrans/common/internal/ellips.F90 index 55682502d..76daf2d30 100644 --- a/src/etrans/cpu/internal/ellips.F90 +++ b/src/etrans/common/internal/ellips.F90 @@ -11,7 +11,7 @@ ! Jan-2011 P. Marguinaud Interface to thread-safe FA SUBROUTINE ELLIPS (KSMAX,KMSMAX,KNTMP,KMTMP) -USE PARKIND1, ONLY : JPRD, JPIM +USE EC_PARKIND, ONLY : JPRD, JPIM USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK IMPLICIT NONE ! diff --git a/src/etrans/cpu/internal/esetup_dims_mod.F90 b/src/etrans/common/internal/esetup_dims_mod.F90 similarity index 97% rename from src/etrans/cpu/internal/esetup_dims_mod.F90 rename to src/etrans/common/internal/esetup_dims_mod.F90 index b5b1a2271..69271ad99 100644 --- a/src/etrans/cpu/internal/esetup_dims_mod.F90 +++ b/src/etrans/common/internal/esetup_dims_mod.F90 @@ -13,7 +13,7 @@ MODULE ESETUP_DIMS_MOD CONTAINS SUBROUTINE ESETUP_DIMS -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND ,ONLY : JPIM USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE TPM_DIM ,ONLY : R diff --git a/src/etrans/cpu/internal/esetup_geom_mod.F90 b/src/etrans/common/internal/esetup_geom_mod.F90 similarity index 98% rename from src/etrans/cpu/internal/esetup_geom_mod.F90 rename to src/etrans/common/internal/esetup_geom_mod.F90 index e61f9b6b9..a619e8772 100644 --- a/src/etrans/cpu/internal/esetup_geom_mod.F90 +++ b/src/etrans/common/internal/esetup_geom_mod.F90 @@ -13,7 +13,7 @@ MODULE ESETUP_GEOM_MOD CONTAINS SUBROUTINE ESETUP_GEOM -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND ,ONLY : JPIM USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE TPM_GEN ,ONLY : NOUT, NPRINTLEV diff --git a/src/etrans/cpu/internal/suemplat_mod.F90 b/src/etrans/common/internal/suemplat_mod.F90 similarity index 99% rename from src/etrans/cpu/internal/suemplat_mod.F90 rename to src/etrans/common/internal/suemplat_mod.F90 index 7f1c1393e..53b941d2c 100644 --- a/src/etrans/cpu/internal/suemplat_mod.F90 +++ b/src/etrans/common/internal/suemplat_mod.F90 @@ -93,7 +93,7 @@ SUBROUTINE SUEMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& ! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE EC_PARKIND ,ONLY : JPIM ,JPRD USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE TPM_GEN ,ONLY : NOUT, NPRINTLEV diff --git a/src/etrans/cpu/internal/suemplatb_mod.F90 b/src/etrans/common/internal/suemplatb_mod.F90 similarity index 99% rename from src/etrans/cpu/internal/suemplatb_mod.F90 rename to src/etrans/common/internal/suemplatb_mod.F90 index 66c275599..3717c5ceb 100644 --- a/src/etrans/cpu/internal/suemplatb_mod.F90 +++ b/src/etrans/common/internal/suemplatb_mod.F90 @@ -74,7 +74,7 @@ SUBROUTINE SUEMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,& ! A.Bogatchev 21-Sep-2010 phasing CY37 ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE EC_PARKIND ,ONLY : JPIM, JPRD USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS diff --git a/src/etrans/cpu/internal/tpmald_dim.F90 b/src/etrans/common/internal/tpmald_dim.F90 similarity index 74% rename from src/etrans/cpu/internal/tpmald_dim.F90 rename to src/etrans/common/internal/tpmald_dim.F90 index 188f6ebc6..409812662 100644 --- a/src/etrans/cpu/internal/tpmald_dim.F90 +++ b/src/etrans/common/internal/tpmald_dim.F90 @@ -13,7 +13,7 @@ MODULE TPMALD_DIM ! Module for dimensions. -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND ,ONLY : JPIM, JPIB IMPLICIT NONE @@ -26,6 +26,13 @@ MODULE TPMALD_DIM INTEGER(KIND=JPIM) :: NDGLSUR ! Number of rows of latitudes+... INTEGER(KIND=JPIM) :: NMSMAX ! Zonal truncation INTEGER(KIND=JPIM) :: NDGUX ! Number of rows in zone C+I + +! arguments to pass to EXECUTE_FFT: kept here to make sure their addresses are constant (necessary for cuda graphs) +INTEGER(KIND=JPIM) :: NLOENS_LON(1) +INTEGER(KIND=JPIB) :: NOFFSETS_LON(2) +INTEGER(KIND=JPIM) :: NLOENS_LAT(1) +INTEGER(KIND=JPIB) :: NOFFSETS_LAT(2) + END TYPE ALDDIM_TYPE TYPE(ALDDIM_TYPE),ALLOCATABLE,TARGET :: ALDDIM_RESOL(:) diff --git a/src/etrans/cpu/internal/tpmald_distr.F90 b/src/etrans/common/internal/tpmald_distr.F90 similarity index 96% rename from src/etrans/cpu/internal/tpmald_distr.F90 rename to src/etrans/common/internal/tpmald_distr.F90 index 2d9cc0a79..76ef74762 100644 --- a/src/etrans/cpu/internal/tpmald_distr.F90 +++ b/src/etrans/common/internal/tpmald_distr.F90 @@ -13,7 +13,7 @@ MODULE TPMALD_DISTR ! Module for distributed memory environment. -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND ,ONLY : JPIM IMPLICIT NONE diff --git a/src/etrans/cpu/CMakeLists.txt b/src/etrans/cpu/CMakeLists.txt index 3bd6ad8cf..ea9bcbc8a 100644 --- a/src/etrans/cpu/CMakeLists.txt +++ b/src/etrans/cpu/CMakeLists.txt @@ -27,7 +27,7 @@ function(generate_backend_sources) internal/*.F90 external/*.F90 biper/internal/*.F90 - biper/external/*.F90 + biper/external/*.F90 QUIET ) @@ -50,20 +50,20 @@ set( BUILD_INTERFACE_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/ectrans ) foreach( prec dp sp ) if( HAVE_${prec} ) - generate_backend_includes(BACKEND ${prec} TARGET ectrans_etrans_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/etrans/include ) - generate_backend_sources( BACKEND ${prec} OUTPUT ectrans_etrans_${prec}_src DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/generated/ectrans_etrans_${prec}) + generate_backend_includes(BACKEND ${prec} TARGET ectrans_lam_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/etrans/include ) + generate_backend_sources( BACKEND ${prec} OUTPUT ectrans_lam_${prec}_src DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/generated/ectrans_lam_${prec}) ecbuild_add_library( - TARGET ectrans_etrans_${prec} + TARGET ectrans_lam_${prec} LINKER_LANGUAGE Fortran - SOURCES ${ectrans_etrans_${prec}_src} + SOURCES ${ectrans_lam_${prec}_src} PUBLIC_INCLUDES $ $ $ - PUBLIC_LIBS fiat ectrans_common ectrans_${prec}_includes ectrans_${prec} ectrans_etrans_${prec}_includes + PUBLIC_LIBS fiat ectrans_common ectrans_${prec}_includes ectrans_${prec} ectrans_lam_common ectrans_lam_${prec}_includes ) ectrans_target_fortran_module_directory( - TARGET ectrans_etrans_${prec} + TARGET ectrans_lam_${prec} MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans INSTALL_DIRECTORY module/ectrans ) @@ -75,23 +75,23 @@ foreach( prec dp sp ) set( FFTW_LINK PUBLIC ) # Attempt anyway to give FFTW precedence endif() ecbuild_debug("target_link_libraries( trans_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} )") - target_link_libraries( ectrans_etrans_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} ) - target_include_directories( ectrans_etrans_${prec} PRIVATE ${FFTW_INCLUDE_DIRS} ) - target_compile_definitions( ectrans_etrans_${prec} PRIVATE WITH_FFTW ) + target_link_libraries( ectrans_lam_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} ) + target_include_directories( ectrans_lam_${prec} PRIVATE ${FFTW_INCLUDE_DIRS} ) + target_compile_definitions( ectrans_lam_${prec} PRIVATE WITH_FFTW ) # daand: lam transforms don't need lapack #ecbuild_debug("target_link_libraries( ectrans_etrans_${prec} PRIVATE ${LAPACK_LIBRARIES} )") #target_link_libraries( ectrans_${prec} PRIVATE ${LAPACK_LIBRARIES} ) if( HAVE_OMP ) ecbuild_debug("target_link_libraries( ectrans_${prec} PRIVATE OpenMP::OpenMP_Fortran )") - target_link_libraries( ectrans_${prec} PRIVATE OpenMP::OpenMP_Fortran ) + target_link_libraries( ectrans_lam_${prec} PRIVATE OpenMP::OpenMP_Fortran ) endif() # This interface library is for backward compatibility, and provides the older includes ecbuild_add_library( TARGET etrans_${prec} TYPE INTERFACE ) target_include_directories( etrans_${prec} INTERFACE $ ) target_include_directories( etrans_${prec} INTERFACE $ ) - target_link_libraries( trans_${prec} INTERFACE fiat etrans_${prec} ectrans_etrans_${prec} parkind_${prec}) + target_link_libraries( etrans_${prec} INTERFACE ectrans_lam_${prec}) # seems ok to omit fiat, parkind, trans here. endif() endforeach() diff --git a/src/etrans/cpu/internal/edist_spec_control_mod.F90 b/src/etrans/cpu/internal/edist_spec_control_mod.F90 deleted file mode 100644 index 23ae29d7c..000000000 --- a/src/etrans/cpu/internal/edist_spec_control_mod.F90 +++ /dev/null @@ -1,14 +0,0 @@ -! (C) Copyright 2001- ECMWF. -! (C) Copyright 2001- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - - -MODULE EDIST_SPEC_CONTROL_MOD - ! dead code - merged with DIST_SPEC_CONTROL_MOD -END MODULE EDIST_SPEC_CONTROL_MOD diff --git a/src/etrans/cpu/internal/espnormc_mod.F90 b/src/etrans/cpu/internal/espnormc_mod.F90 deleted file mode 100644 index f802ac553..000000000 --- a/src/etrans/cpu/internal/espnormc_mod.F90 +++ /dev/null @@ -1,14 +0,0 @@ -! (C) Copyright 2001- ECMWF. -! (C) Copyright 2001- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - - -MODULE ESPNORMC_MOD - ! dead code -END MODULE ESPNORMC_MOD diff --git a/src/etrans/gpu/CMakeLists.txt b/src/etrans/gpu/CMakeLists.txt new file mode 100644 index 000000000..5a196e065 --- /dev/null +++ b/src/etrans/gpu/CMakeLists.txt @@ -0,0 +1,133 @@ +set( GPU_LIBRARY_TYPE SHARED ) +if( HAVE_GPU_STATIC ) + set( GPU_LIBRARY_TYPE STATIC ) +endif() + +function(generate_backend_sources) + set (options) + set (oneValueArgs BACKEND DESTINATION OUTPUT) + set (multiValueArgs) + + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(backend ${_PAR_BACKEND}) + set(destination ${_PAR_DESTINATION}) + file(MAKE_DIRECTORY ${destination}/internal) + file(MAKE_DIRECTORY ${destination}/external) + file(MAKE_DIRECTORY ${destination}/biper/external) + file(MAKE_DIRECTORY ${destination}/biper/internal) + + ecbuild_list_add_pattern( LIST files + GLOB + internal/*.F90 + external/*.F90 + biper/internal/*.F90 + biper/external/*.F90 + QUIET + ) + + set(outfiles) + foreach(file_i ${files}) + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + set(outfile "${destination}/${file_i}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) + endforeach(file_i) + set(${_PAR_OUTPUT} ${outfiles} PARENT_SCOPE) +endfunction(generate_backend_sources) + + +set( BUILD_INTERFACE_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/ectrans ) + +foreach( prec dp sp ) + if( HAVE_${prec} ) + + set(GENERATED_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/ectrans_lam_gpu_${prec}) + + generate_backend_includes(BACKEND gpu_${prec} TARGET ectrans_lam_gpu_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/etrans/include ) + generate_backend_sources( BACKEND gpu_${prec} OUTPUT ectrans_lam_gpu_${prec}_src DESTINATION ${GENERATED_SOURCE_DIR}) + + + # set custom compilation flags here: keeping as placeholder + #if( NOT ${CMAKE_BUILD_TYPE_CAPS} STREQUAL DEBUG ) + #set_source_files_properties( ${GENERATED_SOURCE_DIR}/internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + #ecbuild_info("warn: special compile flags ftinv_mod.F90") + #set_source_files_properties( ${GENERATED_SOURCE_DIR}/internal/ftdir_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + #ecbuild_info("warn: special compile flags ftdir_mod.F90") + #endif() + + ecbuild_add_library( + TARGET ectrans_lam_gpu_${prec} + TYPE ${GPU_LIBRARY_TYPE} + SOURCES ${ectrans_lam_gpu_${prec}_src} + LINKER_LANGUAGE Fortran + PUBLIC_INCLUDES $ + $ + $ + $ + PUBLIC_LIBS fiat ectrans_common ectrans_gpu_common ectrans_gpu_${prec} ectrans_gpu_${prec}_includes ectrans_lam_common ectrans_lam_gpu_${prec}_includes + PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> + $<${HAVE_MPI}:MPI::MPI_Fortran> + PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU + #$<${HAVE_CUTLASS}:USE_CUTLASS> # not relevant for LAM + #$<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> # not relevant for LAM + #$<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> # not relevant for LAM + $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> + #$<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> # not relevant for LAM + #ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} # not relevant for LAM + ) + + ectrans_target_fortran_module_directory( + TARGET ectrans_lam_gpu_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans + ) + + if( prec STREQUAL sp ) + target_compile_definitions( ectrans_lam_gpu_${prec} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) + endif() + + # cuFFT can do in-place FFT, hipFFT cannot + if( HAVE_CUDA ) + target_compile_definitions( ectrans_lam_gpu_${prec} PRIVATE IN_PLACE_FFT ) + endif() + + if( HAVE_OMP AND CMAKE_Fortran_COMPILER_ID MATCHES Cray ) + # Propagate flags as link options for downstream targets. Only required for Cray + target_link_options( ectrans_lam_gpu_${prec} INTERFACE + $<$:SHELL:${OpenMP_Fortran_FLAGS}> + $<$:SHELL:${OpenMP_Fortran_FLAGS}> + $<$:SHELL:${OpenMP_Fortran_FLAGS}> ) + endif() + + if( HAVE_ACC AND CMAKE_Fortran_COMPILER_ID MATCHES NVHPC ) + # Propagate flags as link options for downstream targets. Only required for NVHPC + target_link_options( ectrans_lam_gpu_${prec} INTERFACE + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> ) + endif() + + # This interface library is for backward compatibility, and provides the older includes + ecbuild_add_library( TARGET etrans_gpu_${prec} TYPE INTERFACE ) + target_include_directories( etrans_gpu_${prec} INTERFACE $ ) + target_include_directories( etrans_gpu_${prec} INTERFACE $ ) + target_link_libraries( etrans_gpu_${prec} INTERFACE ectrans_lam_gpu_${prec}) + + + # ## Install trans_gpu_${prec} interface + # file( GLOB trans_interface ${PROJECT_SOURCE_DIR}/src/trans/include/ectrans/* ) + # install( + # FILES ${trans_interface} + # DESTINATION include/ectrans/trans_gpu_${prec} + # ) + endif() +endforeach() + +## Install etrans interface +install( DIRECTORY ${BUILD_INTERFACE_INCLUDE_DIR}/ DESTINATION include/ectrans ) diff --git a/src/etrans/gpu/biper/external/etibihie.F90 b/src/etrans/gpu/biper/external/etibihie.F90 new file mode 100644 index 000000000..033adf85d --- /dev/null +++ b/src/etrans/gpu/biper/external/etibihie.F90 @@ -0,0 +1,101 @@ +SUBROUTINE ETIBIHIE(KDLON,KDGL,KNUBI,KDLUX,KDGUX,& + & KSTART,KDLSM,PGPBI,LDBIX,LDBIY,KDADD) + +!**** tool ETIBIHIE : Doubly-periodicisation : isotropic spline +! ------------- method. + +! purpose : +! -------- +! KNUBI horizontal fields which are known on C U I, +! are extended over E, in order to obtain doubly-periodic +! fields. +! IF LDBIX is equal .TRUE. , then the fields are periodicise +! in the x ( or longitude ) direction. If it is not the case, +! KDLUX must be equal to KDLON. +! IF LDBIY is equal .TRUE. , then the fields are periodicise +! in the y ( or latitude ) direction. If it is not the case, +! KDGUX must be equal to KDGL. + +!* *CALL* *ETIBIHIE*(...) + +! externals : +! ---------- +! ESPLIN spline extension +! ESMOOTH smoothing across to get isotropy. + +! explicit arguments : +! ------------------ +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KNUBI : number of horizontal fields to doubly-periodicise. +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KSTART : first dimension in x direction of g-p array +! KDLSM : second dimension in x direction of g-p array +! PGPBI : gridpoint array on C U I U E. +! LDBIX : logical to periodicize or not +! in the x ( or longitude ) direction. +! LDBIY : logical to periodicize or not +! in the y ( or latitude ) direction. +! KDADD : 1 to test biperiodiz. + +! references : +! ---------- + +! author : +! ------ +! V. Ducrocq + +! modification : +! ------------ +! A. Stanesic 28/03/2008: KDADD - test of externalized biper. +! ------------------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE ESPLINE_MOD +USE ESMOOTHE_MOD + +! ------------------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB),INTENT(INOUT) :: PGPBI(KSTART:KDLSM+KDADD,KNUBI,1:KDGL+KDADD) +LOGICAL,INTENT(IN) :: LDBIX +LOGICAL,INTENT(IN) :: LDBIY + +! ------------------------------------------------------------------------- + +REAL(KIND=JPRB) :: ZALFA +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ETIBIHIE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------------- + +!* 1. DOUBLY-PERIODICISE : +! ------------------ + +ZALFA = 0.0_JPRB + +CALL ESPLINE(1,KDLON,1,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM+KDADD,1,KDGL+KDADD,KNUBI,PGPBI,ZALFA,LDBIX,LDBIY,KDADD) +CALL ESMOOTHE(1,KDLON,1,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM+KDADD,1,KDGL+KDADD,KNUBI,PGPBI,LDBIX,LDBIY) + +! ------------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ETIBIHIE',1,ZHOOK_HANDLE) +END SUBROUTINE ETIBIHIE diff --git a/src/etrans/gpu/biper/external/fpbipere.F90 b/src/etrans/gpu/biper/external/fpbipere.F90 new file mode 100644 index 000000000..2b75ca620 --- /dev/null +++ b/src/etrans/gpu/biper/external/fpbipere.F90 @@ -0,0 +1,154 @@ +SUBROUTINE FPBIPERE(KDLUX,KDGUX,KDLON,KDGL,KNUBI,KD1,PGPBI,KDADD,LDZON, & + & LDBOYD, KDBOYD, PLBOYD) + +!**** *FPBIPERE* - Full-POS interface for double periodicisation + +! purpose : +! -------- +! To bi-periodicise the post-processed fields, or just fill the extension zone +! with the mean value of C+I area + +!** INTERFACE. +! ---------- +! *CALL* *FPBIPERE*(...) + +! EXPLICIT ARGUMENTS +! -------------------- +! KDLUX : upper bound for the x (or longitude) dimension of C U I. +! KDGUX : upper bound for the y (or latitude) dimension of C U I. +! KDLON : upper bound for the x (or longitude) dimension of the gridpoint array on C U I U E +! KDGL : upper bound for the y (or latitude) dimension of the gridpoint array on C U I U E +! KNUBI : number of horizontal fields to doubly-periodicise. +! KD1 : dimension of input/output array +! PGPBI : input/output gridpoint array on C U I U E. +! LDZON : .true. if input grid on C U I U E (.false. if C U I) +! KDADD : 1 to test biperiodiz. +! LDBOYD: perform boyd periodization (inside C U I) +! KDBOYD: array containing dimensions of boyd domain +! PLBOYD: scalar parameter for boyd (variable L in paper) + +! IMPLICIT ARGUMENTS +! -------------------- + +! METHOD. +! ------- +! SEE DOCUMENTATION + +! EXTERNALS. +! ---------- +! ESPLINE spline extension +! ESMOOTHE smoothing across to get isotropy. + +! REFERENCE. +! ---------- +! ECMWF Research Department documentation of the IFS + +! AUTHOR. +! ------- +! RYAD EL KHATIB *METEO-FRANCE* + +! MODIFICATIONS. +! -------------- +! R. El Khatib : 01-08-07 Pruning options +! M.Hamrud : 01-Oct-2003 CY28 Cleaning +! F. Taillefer : 04-10-21 Add LDZON +! A. Stanesic : 28-03-08: KDADD - test of externalized biper. +! D. Degrauwe : feb 2012 Boyd periodization +! R. El Khatib 27-Sep-2013 Boyd periodization in Fullpos-2 +! R. El Khatib 04-Aug-2016 new interface to ewindowe + cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE ESPLINE_MOD +USE ESMOOTHE_MOD +USE EWINDOWE_MOD +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KD1 +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PGPBI(KD1,KNUBI) +LOGICAL, OPTIONAL ,INTENT(IN) :: LDZON +LOGICAL ,INTENT(IN) ,OPTIONAL :: LDBOYD +INTEGER(KIND=JPIM),INTENT(IN) ,OPTIONAL :: KDBOYD(6) +REAL(KIND=JPRB) ,INTENT(IN) ,OPTIONAL :: PLBOYD + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB), ALLOCATABLE :: ZGPBI(:,:,:) +INTEGER(KIND=JPIM) :: IND, ISTAE, JGL, JLON, JNUBI, IBWX, IBWY +LOGICAL :: LLZON, LLBOYD +REAL(KIND=JPRB) :: ZALFA +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('FPBIPERE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +LLBOYD=.FALSE. +IF (PRESENT(LDBOYD)) LLBOYD=LDBOYD + + +!* 2. DOUBLY-PERIODICISE +! ------------------ + +IF (LLBOYD) THEN + IF (.NOT.PRESENT(KDBOYD)) CALL ABOR1('FPBIPERE: Boyd periodization requires KDBOYD argument') + IF (.NOT.PRESENT(PLBOYD)) CALL ABOR1('FPBIPERE: Boyd periodization requires PLBOYD argument') + IBWX=KDBOYD(3) + IBWY=KDBOYD(6) + CALL EWINDOWE(KDLON,KDLUX,IBWX,KDGL,KDGUX,IBWY,KNUBI,PGPBI,PLBOYD,.TRUE.,.TRUE.) +ELSE + LLZON=.FALSE. + IF(PRESENT(LDZON)) LLZON=LDZON + ALLOCATE(ZGPBI(KDLON+KDADD,KNUBI,KDGL+KDADD)) + IF(LLZON) THEN +! Copy C+I+E + IND=KDLON + ELSE +! Copy C+I + IND=KDLUX + ENDIF +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JNUBI,ISTAE,JGL,JLON) + DO JNUBI=1,KNUBI + ISTAE=0 + DO JGL=1,KDGUX + DO JLON=1,KDLUX + ZGPBI(JLON,JNUBI,JGL)=PGPBI(ISTAE+JLON,JNUBI) + ENDDO + ISTAE=ISTAE+IND + ENDDO + ENDDO +!$OMP END PARALLEL DO + ZALFA = 0.0_JPRB + CALL ESPLINE(1,KDLON,1,KDGL,KDLUX,KDGUX,1,KDLON+KDADD,1,KDGL+KDADD,KNUBI,ZGPBI,& + & ZALFA,.TRUE.,.TRUE.,KDADD) + CALL ESMOOTHE(1,KDLON,1,KDGL,KDLUX,KDGUX,1,KDLON+KDADD,1,KDGL+KDADD,KNUBI,ZGPBI,& + & .TRUE.,.TRUE.) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JNUBI,ISTAE,JGL,JLON) + DO JNUBI=1,KNUBI + ISTAE=0 + DO JGL=1,KDGL + DO JLON=1,KDLON + PGPBI(ISTAE+JLON,JNUBI)=ZGPBI(JLON,JNUBI,JGL) + ENDDO + ISTAE=ISTAE+KDLON + ENDDO + ENDDO +!$OMP END PARALLEL DO + DEALLOCATE(ZGPBI) +ENDIF + + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('FPBIPERE',1,ZHOOK_HANDLE) +END SUBROUTINE FPBIPERE diff --git a/src/etrans/gpu/biper/external/horiz_field.F90 b/src/etrans/gpu/biper/external/horiz_field.F90 new file mode 100644 index 000000000..0d66345c2 --- /dev/null +++ b/src/etrans/gpu/biper/external/horiz_field.F90 @@ -0,0 +1,66 @@ +SUBROUTINE HORIZ_FIELD(KX,KY,PHFIELD) + +! purpose : +! -------- +! To produce test horizontal field of temperature. + +! method : +! --------- +! Test horizontal input field is on horizontal grid size KXxKY points, and it +! represent's temperature. It is obtained form flollwing expression: +! PHFIELD(i,j)=280*(1+0.1*Sin[PPI*(i+0.5*IMAX)*(j+0.7*IMAX)/IMAX^2+1]) (Pierre Benard) + +! interface : +! --------- +! CALL HORIZ_FIELD(KX,KY,PHFIELD) + +! Explicit arguments : +! ------------------- +! KX - number of grid points in x +! KY - number of grid points in y +! PHFIELD - simulated 2D temperature horizontal field + +! externals : +! ---------- +! None. + +! references : +! ---------- + +! author : +! ------ +! 23-May-2008 Antonio Stanesic +! ---------------------------------------------------------------------- + +USE PARKIND1 , ONLY : JPIM ,JPRB +USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + +! ---------------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KX +INTEGER(KIND=JPIM), INTENT(IN) :: KY +REAL(KIND=JPRB), INTENT(OUT) :: PHFIELD(KX,KY) + +! ---------------------------------------------------------------------- + +REAL(KIND=JPRB), PARAMETER :: PPI=3.141592 +INTEGER(KIND=JPIM) :: JX,JY,IMAX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ---------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('HORIZ_FIELD',0,ZHOOK_HANDLE) +! ---------------------------------------------------------------------- + +IMAX=MAX(KX,KY) + +DO JY=1,KY + DO JX=1,KX + PHFIELD(JX,JY)=280*(1+0.1*SIN(PPI*(JX+0.5*IMAX)*(JY+0.7*IMAX)/(IMAX**2)+1)) + ENDDO +ENDDO + +! ---------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('HORIZ_FIELD',1,ZHOOK_HANDLE) +END SUBROUTINE HORIZ_FIELD diff --git a/src/etrans/gpu/biper/internal/esmoothe_mod.F90 b/src/etrans/gpu/biper/internal/esmoothe_mod.F90 new file mode 100644 index 000000000..4d65fe998 --- /dev/null +++ b/src/etrans/gpu/biper/internal/esmoothe_mod.F90 @@ -0,0 +1,171 @@ +MODULE ESMOOTHE_MOD +CONTAINS +SUBROUTINE ESMOOTHE(KDLUN,KDLON,KDGUN,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM,KDGSA,KDGEN,KNUBI,PWORK,LDBIX,LDBIY) + +! purpose : +! -------- +! To smooth the fields over the extension zone. + +!* *CALL* *ESMOOTHE*(...) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLUN : lower bound for the x (or longitude) dimension +! of the gridpoint array +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGUN : lower bound for the y (or latitude) dimension +! of the gridpoint array +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KDLSM : dimension in x direction of g-p array +! KDGSA : first dimension index in y of g-p array +! KDGEN : last dimension index in y of g-p array +! KSTART : first dimension index in x of g-p array +! KDLSM : last dimension index in x of g-p array +! KNUBI : number of levels to biperiodicise + +! PWORK : gridpoint array on C U I U E. + +! LDBIX : .TRUE.: biperiodicise in x direction (and vice versa) +! LDBIY : .TRUE.: biperiodicise in y direction (and vice versa) + +! references : +! ---------- + +! author : +! ------ +! Michal Batka and Radmila Bubnova ( B & B ) + +! modifications : +! ------------- +! R. El Khatib 03-05-05 Optimizations +! -------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +! -------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGEN +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(KSTART:KDLSM,KNUBI,KDGSA:KDGEN) +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY + +! -------------------------------------------------------------- + +REAL(KIND=JPRB) :: ZPRAC(KDLUN-1:KDLON+1,KDGUN-1:KDGL+1) +INTEGER(KIND=JPIM) :: IEND, IENX1, IENX2, IENY1, IENY2, JFL, JLAT, JLL, JLON +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! -------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESMOOTHE',0,ZHOOK_HANDLE) +! -------------------------------------------------------------- + +!* 1. Calculation. +! ------------ + +IEND = MAX(KDLON-KDLUX,KDGL-KDGUX) +IEND = (IEND+1)/2 +IENX1= KDLON +IENX2= KDGL +IENY1= KDGL +IENY2= KDLON +IF(LDBIX.AND.(.NOT.LDBIY)) THEN + IENX2 = KDGUX + IENY1 = KDGUX +ELSEIF((.NOT.LDBIX).AND.LDBIY) THEN + IENX1 = KDLUX + IENY2 = KDLUX +ELSEIF((.NOT.LDBIX).AND.(.NOT.LDBIY)) THEN + IF (LHOOK) CALL DR_HOOK('ESMOOTHE',1,ZHOOK_HANDLE) + RETURN +ENDIF + +DO JFL = 1, KNUBI + + DO JLL = 1, IEND + + DO JLON = KDLUX,KDLON + DO JLAT = KDGUN,KDGL + ZPRAC(JLON,JLAT) = PWORK(JLON,JFL,JLAT) + ENDDO + ENDDO + + DO JLON = KDLUX,KDLON + ZPRAC(JLON,KDGUN-1) = PWORK(JLON,JFL,KDGL) + ZPRAC(JLON,KDGL +1) = PWORK(JLON,JFL,KDGUN) + ENDDO + DO JLAT = KDGUN,KDGL + ZPRAC(KDLON+1,JLAT) = PWORK(KDLUN,JFL,JLAT) + ENDDO + ZPRAC(KDLON+1,KDGUN-1) = PWORK(KDLUN,JFL,KDGL) + ZPRAC(KDLON+1,KDGL +1) = PWORK(KDLUN,JFL,KDGUN) + + DO JLON = KDLUX + JLL,IENX1 - JLL + 1 + DO JLAT = KDGUN, IENX2 + PWORK(JLON,JFL,JLAT)=(4._JPRB*ZPRAC(JLON,JLAT)+2.0_JPRB*(ZPRAC(JLON+& + & 1,JLAT)+& + & ZPRAC(JLON-1,JLAT) + ZPRAC(JLON,JLAT+1) +& + & ZPRAC(JLON,JLAT-1)) + ZPRAC(JLON+1,JLAT+1) +& + & ZPRAC(JLON-1,JLAT+1) + ZPRAC(JLON+1,JLAT-1)+& + & ZPRAC(JLON-1,JLAT-1))/16._JPRB + ENDDO + ENDDO + + DO JLAT = KDGUX,KDGL + DO JLON = KDLUN,KDLON + ZPRAC(JLON,JLAT) = PWORK(JLON,JFL,JLAT) + ENDDO + ENDDO + + DO JLAT = KDGUX,KDGL + ZPRAC(KDLUN-1,JLAT) = PWORK(KDLON,JFL,JLAT) + ZPRAC(KDLON+1,JLAT) = PWORK(KDLUN,JFL,JLAT) + ENDDO + DO JLON = KDLUN,KDLON + ZPRAC(JLON,KDGL +1) = PWORK(JLON,JFL,KDGUN) + ENDDO + ZPRAC(KDLUN-1,KDGL +1) = PWORK(KDLON,JFL,KDGUN) + ZPRAC(KDLON+1,KDGL +1) = PWORK(KDLUN,JFL,KDGUN) + + DO JLAT = KDGUX + JLL, IENY1 - JLL + 1 + DO JLON = KDLUN,IENY2 + PWORK(JLON,JFL,JLAT)=(4._JPRB*ZPRAC(JLON,JLAT)+2.0_JPRB*(ZPRAC(JLON+& + & 1,JLAT)+& + & ZPRAC(JLON-1,JLAT) + ZPRAC(JLON,JLAT+1) +& + & ZPRAC(JLON,JLAT-1)) + ZPRAC(JLON+1,JLAT+1) +& + & ZPRAC(JLON-1,JLAT+1) + ZPRAC(JLON+1,JLAT-1)+& + & ZPRAC(JLON-1,JLAT-1))/16._JPRB + ENDDO + ENDDO + + ENDDO + +ENDDO + +! -------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESMOOTHE',1,ZHOOK_HANDLE) +END SUBROUTINE ESMOOTHE +END MODULE ESMOOTHE_MOD diff --git a/src/etrans/gpu/biper/internal/espline_mod.F90 b/src/etrans/gpu/biper/internal/espline_mod.F90 new file mode 100644 index 000000000..e44880f19 --- /dev/null +++ b/src/etrans/gpu/biper/internal/espline_mod.F90 @@ -0,0 +1,189 @@ +MODULE ESPLINE_MOD +CONTAINS +SUBROUTINE ESPLINE(KDLUN,KDLON,KDGUN,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM,KDGSA,KDGEN,KNUBI,PWORK,PALFA,LDBIX,LDBIY,KDAD) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *ESPLINE*(...) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLUN : lower bound for the x (or longitude) dimension +! of the gridpoint array +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGUN : lower bound for the y (or latitude) dimension +! of the gridpoint array +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KSTART : first dimension in x direction of g-p array +! KDLSM : last dimension in x direction of g-p array +! KDGSA : first dimension in y of g-p array +! KDGEN : last dimension in y of g-p array +! KNUBI : number of levels to biperiodicise +! PWORK : gridpoint array on C U I U E. +! PALFA : boundary condition of a spline: +! = 0. ... natural spline +! = 1. ... boundary condition computed differentially +! (additional option) +! LDBIX : .TRUE. biperiodicisation in x ( and vice versa ) +! LDBIY : .TRUE. biperiodicisation in y ( and vice versa ) +! KDAD : 1 for test of biperiodic. + +! references : +! ---------- + +! author : +! ------ +! Michal Batka and Radmila Bubnova ( B & B ) + +! modifications : +! ------------- +! J.Vivoda 03-2002 2D model fix +! A. Stanesic : 28-03-08: KDADD - test of externalized biper. +! ------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +! ------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGEN +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(KSTART:KDLSM,KNUBI,KDGSA:KDGEN) +REAL(KIND=JPRB) ,INTENT(IN) :: PALFA +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY +INTEGER(KIND=JPIM),INTENT(IN) :: KDAD + +! ------------------------------------------------------------- + +LOGICAL :: LLBIX +LOGICAL :: LLBIY +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESPLINE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------- + +!* 1. Spline Extension. +! ------------------- + +LLBIX=LDBIX +LLBIY=LDBIY + +IF( KDLUN==1.AND.KDLUX==1 ) LLBIX=.FALSE. +IF( KDGUN==1.AND.KDGUX==1 ) LLBIY=.FALSE. + +IENDX = KDGUX +IENDY = KDLON + +IF(LLBIX.AND.(.NOT.LLBIY)) THEN + IENDY = KDLUN - 1 + +ELSEIF((.NOT.LLBIX).AND.LLBIY) THEN + IENDX = KDGUN - 1 + IENDY = KDLUX + +ELSEIF((.NOT.LLBIX).AND.(.NOT.LLBIY)) THEN + IF (LHOOK) CALL DR_HOOK('ESPLINE',1,ZHOOK_HANDLE) + RETURN +ENDIF +DO JFL = 1, KNUBI + + ZK = REAL(KDLON-KDLUX+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = PALFA/ZKP1 + + DO JLAT=KDGUN,IENDX + + ZEPSA = ((PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK -& + & PWORK(KDLUX,JFL,JLAT)+PWORK(KDLUX-1,JFL,JLAT))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(KDLUX,JFL,JLAT)-2.0_JPRB* PWORK(KDLUX-1,JFL,JLAT)+& + & PWORK(KDLUX-2,JFL,JLAT)) + + ZEPSB = (PWORK(KDLUN+1,JFL,JLAT)-PWORK(KDLUN,JFL,JLAT) -& + & (PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(KDLUN+2,JFL,JLAT)-2.0_JPRB* PWORK(KDLUN+1,JFL,JLAT)+& + & PWORK(KDLUN,JFL,JLAT)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(KDLUX,JFL,JLAT) + ZB = (PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + DO JLON=KDLUX+1,KDLON+KDAD + ZJ = REAL(JLON - KDLUX,JPRB) + PWORK(JLON,JFL,JLAT) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + ZK = REAL(KDGL - KDGUX + 1,JPRB) + ZKP1 = ZK + 1 + ZLAM = ZK/ZKP1 + ZNY = PALFA/ZKP1 + + DO JLON=KDLUN,IENDY+KDAD + + ZEPSA = ((PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK -& + & PWORK(JLON,JFL,KDGUX)+PWORK(JLON,JFL,KDGUX-1))*6._JPRB/ZKP1-& + & ZNY*(PWORK(JLON,JFL,KDGUX)-2.0_JPRB*PWORK(JLON,JFL,KDGUX-1)+& + & PWORK(JLON,JFL,KDGUX-2)) + + ZEPSB = (PWORK(JLON,JFL,KDGUN+1)-PWORK(JLON,JFL,KDGUN) -& + & (PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JLON,JFL,KDGUN+2)-2.0_JPRB*PWORK(JLON,JFL,KDGUN+1) +& + & PWORK(JLON,JFL,KDGUN)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ ZMM + ZA = PWORK(JLON,JFL,KDGUX) + ZB = (PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK - (2.0_JPRB*& + & ZM1 & + & + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + DO JLAT=KDGUX+1,KDGL+KDAD + ZJ = REAL(JLAT - KDGUX,JPRB) + PWORK(JLON,JFL,JLAT) = ZA +ZJ*(ZB +ZJ*(ZC + ZJ * ZD)) + ENDDO + ENDDO + +ENDDO + +! ------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESPLINE',1,ZHOOK_HANDLE) +END SUBROUTINE ESPLINE +END MODULE ESPLINE_MOD diff --git a/src/etrans/gpu/biper/internal/ewindowe_mod.F90 b/src/etrans/gpu/biper/internal/ewindowe_mod.F90 new file mode 100644 index 000000000..8d49a3379 --- /dev/null +++ b/src/etrans/gpu/biper/internal/ewindowe_mod.F90 @@ -0,0 +1,162 @@ +MODULE EWINDOWE_MOD + +CONTAINS + +SUBROUTINE EWINDOWE(KDLON,KDLUX,KBWX,KDGL,KDGUX,KBWY,KFLD,PGPIN,PSCAL,LDBIX,LDBIY) + +! purpose : +! -------- +! Make boyd periodic extension. + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLON : upper bound for the x (or longitude) dimension +! of C U I U P. +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U P +! PGPIN : gridpoint array on C U I U P (gp:fields). +! PSCAL : window function scaling parameter +! LDBIX : .TRUE. windowing in x direction ( and vice versa ) +! LDBIY : .TRUE. windowing in y direction ( and vice versa ) + + +! references : +! ---------- + +! author : Fabrice Voitus and Piet Termonia, 07/2009 +! ------ +! +! modification : +! Daan Degrauwe 02/2012 Cleaned and generalized +! S. Martinez 03/2012 Calls to ERF under CPP key __PGI +! (ERF function is not intrinsic with PGI) +! R. El Khatib 27-Sep-2013 implicit sized PGPIN +! R. El Khatib 04-Aug-2016 new interface +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KBWX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KBWY +INTEGER(KIND=JPIM),INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PGPIN((KDLUX+2*KBWX+2*(KDLON-KDLUX))*(KDGUX+2*KBWY+2*(KDGL-KDGUX)),KFLD) +REAL(KIND=JPRB) ,INTENT(IN) :: PSCAL +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY + +! FERF function +! ------------- + +#ifdef __PGI +REAL(KIND=JPRB), EXTERNAL :: ERF +#endif + +! scalars +! -------- + +INTEGER(KIND=JPIM) :: JFL, JGL, JLON, IOFF, IDLW, IDGW +INTEGER(KIND=JPIM) :: IWX, ILWX, IRWX, IWY, ILWY, IRWY, IBWXO, IBWYO +INTEGER(KIND=JPIM) :: ILATF, ILONF, IND1, IND, IOFF_LEFT,IOFF_RIGHT,IOFF_BOTTOM,IOFF_TOP +REAL(KIND=JPRB) :: ZI, ZJ, ZK, ZL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! local arrays : +! ------------ + +REAL(KIND=JPRB) :: ZBELX(2*KBWX+(KDLON-KDLUX)) +REAL(KIND=JPRB) :: ZBELY(2*KBWY+(KDGL -KDGUX)) + +!* 1. Boyd Bi-periodic Extension Method. +! --------------------------------- + +IF (LHOOK) CALL DR_HOOK('EWINDOWE',0,ZHOOK_HANDLE) + +IF ((.NOT.LDBIX).AND.(.NOT.LDBIY)) THEN + IF (LHOOK) CALL DR_HOOK('EWINDOWE',1,ZHOOK_HANDLE) + RETURN +ENDIF + +IDGW=SIZE(ZBELY) +IDLW=SIZE(ZBELX) + +! Bell window functions : +! --------------------- + +IF (LDBIX) THEN + DO JLON=1,IDLW + ! variable between -1 and 1 + ZJ=REAL(-IDLW-1+2*JLON,JPRB)/(IDLW+1) + ZL=ZJ/SQRT(1.0_JPRB-(ZJ*ZJ)) +#ifdef __PGI + ZBELX(JLON)=(1.0_JPRB+ERF(REAL(PSCAL*ZL)))/2.0_JPRB +#else + ZBELX(JLON)=(1.0_JPRB+ERF(PSCAL*ZL))/2.0_JPRB +#endif + ENDDO +ENDIF + +IF (LDBIY) THEN + DO JGL=1,IDGW + ! variable between -1 and 1 + ZJ=REAL(-IDGW-1+2*JGL,JPRB)/(IDGW+1) + ZL=ZJ/SQRT(1.0_JPRB-(ZJ*ZJ)) +#ifdef __PGI + ZBELY(JGL)=(1.0_JPRB+ERF(REAL(PSCAL*ZL)))/2.0_JPRB +#else + ZBELY(JGL)=(1.0_JPRB+ERF(PSCAL*ZL))/2.0_JPRB +#endif + ENDDO +ENDIF + + +! Windowing on P+G-zone : +! -------------------- + +IOFF=(KDLUX+2*(KBWX+KDGL-KDGUX)) +IBWXO=KBWX+(KDLON-KDLUX) +IBWYO=KBWY+(KDGL-KDGUX) + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFL,JGL,JLON,ILONF,ILATF,IND1,IND,IOFF_LEFT,IOFF_RIGHT,IOFF_BOTTOM,IOFF_TOP) +DO JFL=1,KFLD + IF (LDBIX) THEN + ! X-direction + DO JGL=1,KDGL+IDGW + IOFF_LEFT=(JGL-1)*IOFF + IOFF_RIGHT=IOFF_LEFT+KDLON + DO JLON=1,IDLW + PGPIN(IOFF_RIGHT+JLON,JFL) = ZBELX(JLON)*PGPIN(IOFF_LEFT+JLON,JFL) +& + & (1.0_JPRB-ZBELX(JLON))*PGPIN(IOFF_RIGHT+JLON,JFL) + ENDDO + ENDDO + ENDIF + IF (LDBIY) THEN + ! Y-direction + DO JGL=1,IDGW + IOFF_BOTTOM=(JGL-1)*IOFF + IOFF_TOP=(KDGL+JGL-1)*IOFF +!DIR$ IVDEP + DO JLON=1,KDLON+IDLW + PGPIN(IOFF_TOP+JLON,JFL) = ZBELY(JGL)*PGPIN(IOFF_BOTTOM+JLON,JFL) +& + & (1.0_JPRB-ZBELY(JGL))*PGPIN(IOFF_TOP+JLON,JFL) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('EWINDOWE',1,ZHOOK_HANDLE) + +END SUBROUTINE EWINDOWE + +END MODULE EWINDOWE_MOD diff --git a/src/etrans/gpu/biper/internal/extper_mod.F90 b/src/etrans/gpu/biper/internal/extper_mod.F90 new file mode 100644 index 000000000..8135d8048 --- /dev/null +++ b/src/etrans/gpu/biper/internal/extper_mod.F90 @@ -0,0 +1,144 @@ +MODULE EXTPER_MOD +CONTAINS +SUBROUTINE EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& + & KPOINTERS,KALFA) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& +! & KPOINTERS,KALFA) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! PWORK : Input: values in C U I area +! : Output: input+(spline extension on the E area) +! KDIM : Dimension of the C U I U E unit of work (one row or one m) +! KPSTA : Position where the unit of work starts +! KPOINTS : Position where the unit of work ends +! KFLDS : number of 2D fields +! KUNITS : Number of units of work +! KPOINTERS : Array of pointers for the units of work +! KALFA : boundary condition of a spline: +! = 0 ... natural spline +! = 1 ... boundary condition computed differentially +! (additional option) +! references : +! ---------- + +! author : +! ------ +! M. Hortal 03-11-2009 +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DISTR + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KDIM +INTEGER(KIND=JPIM),INTENT(IN) :: KPSTA +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTS +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDS +INTEGER(KIND=JPIM),INTENT(IN) :: KUNITS +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTERS(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KALFA + +! arrays : +! -------- +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA + +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPRB) :: ZMAX(KUNITS), ZMIN(KUNITS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EXTPER',0,ZHOOK_HANDLE) + +!* 0. Security +! -------- + +IF(UBOUND(PWORK,1) < KFLDS) THEN + CALL ABOR1(' EXTPER, PWORK first dimension too small') +ENDIF +IF(UBOUND(PWORK,2) < KDIM+2) THEN + WRITE(NOUT,*) ' UBOUND(PWORK,2)=',UBOUND(PWORK,2),' KDIM=',KDIM,' KUNITS=',& + &KUNITS + CALL ABOR1(' EXTPER, PWORK second dimension too small') +ENDIF +IF(UBOUND(KPOINTERS,1) < KUNITS) THEN + CALL ABOR1(' EXTPER, KPOINTERS too small') +ENDIF +IF(UBOUND(PWORK,2) < KPOINTERS(KUNITS)+KDIM) THEN + WRITE(NERR,*) ' EXTPER, KUNITS=',KUNITS,' KPOINTERS=',KPOINTERS(1:KUNITS),& + &' KDIM=',KDIM,' UBOUND(PWORK,2)=',UBOUND(PWORK,2) + CALL ABOR1(' EXTPER, value of KPOINTERS too large') +ENDIF + +!* 1. Spline Extension. +! ------------------- + +DO JFL = 1, KFLDS + + ZK = REAL(KDIM-KPOINTS+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = REAL(KALFA,JPRB)/ZKP1 + + DO JLAT=1,KUNITS + ZEPSA = & + &((PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK -& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-2)) + + ZEPSB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA) -& + & (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPSTA+2)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(JFL,KPOINTERS(JLAT)+KPOINTS) + ZB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + + DO JLON=KPOINTERS(JLAT)+KPOINTS+1,KPOINTERS(JLAT)+KDIM + + ZJ = REAL(JLON - (KPOINTERS(JLAT)+KPOINTS),JPRB) + PWORK(JFL,JLON) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + +ENDDO + +IF (LHOOK) CALL DR_HOOK('EXTPER',1,ZHOOK_HANDLE) +END SUBROUTINE EXTPER +END MODULE EXTPER_MOD diff --git a/src/etrans/gpu/external/edir_trans.F90 b/src/etrans/gpu/external/edir_trans.F90 new file mode 100644 index 000000000..7621b1aab --- /dev/null +++ b/src/etrans/gpu/external/edir_trans.F90 @@ -0,0 +1,503 @@ +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTL - control of Legendre transform +! EFTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 01-03-13 adaptation to aladin +! P. Smolikova 02-09-30 : AUX_PROC for d4 in NH +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 19-04-2013 Comparison of ubound(pspdiv,1) +! with ubound(pspvor,1) +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTL_MOD ,ONLY : EDIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1808,0) +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= UBOUND(PSPVOR,1)) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + +CALL EDIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANS diff --git a/src/etrans/gpu/external/edir_transad.F90 b/src/etrans/gpu/external/edir_transad.F90 new file mode 100644 index 000000000..558c3f259 --- /dev/null +++ b/src/etrans/gpu/external/edir_transad.F90 @@ -0,0 +1,511 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +!USE EDIR_TRANS_CTLAD_MOD ,ONLY : EDIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL ABORT_TRANS('Adjoint code of ectrans/lam not implemented on GPU yet') + +#ifdef UNDEF + +CALL GSTATS(1810,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL EDIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +! ------------------------------------------------------------------ +#endif + +!endif INTERFACE + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',1,ZHOOK_HANDLE) + +END SUBROUTINE EDIR_TRANSAD + diff --git a/src/etrans/gpu/external/edist_grid.F90 b/src/etrans/gpu/external/edist_grid.F90 new file mode 100644 index 000000000..135ee371c --- /dev/null +++ b/src/etrans/gpu/external/edist_grid.F90 @@ -0,0 +1,135 @@ +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'EDIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'EDIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('EDIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'EDIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('EDIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('EDIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('EDIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID + diff --git a/src/etrans/gpu/external/edist_spec.F90 b/src/etrans/gpu/external/edist_spec.F90 new file mode 100644 index 000000000..e89749025 --- /dev/null +++ b/src/etrans/gpu/external/edist_spec.F90 @@ -0,0 +1,196 @@ +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument (change the order of fields) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR ,ONLY : DALD + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM),INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J, IFLD, ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G, ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD=.TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD=LDIM1_IS_FLD +IF(LLDIM1_IS_FLD) THEN + IFLD=1 + ICOEFF=2 +ELSE + IFLD=2 + ICOEFF=1 +ENDIF + +ISMAX = RALD%NMSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +ALLOCATE(IALLMS(ISMAX+1)) +ALLOCATE(IKN(0:ISMAX)) +ISPEC2 = D%NSPEC2 +ISPEC2_G = R%NSPEC2_G +IPOSSP(:) = D%NPOSSP(:) +IDIM0G(:) = D%NDIM0G(:) +ISPEC2MX = D%NSPEC2MX +IUMPP(:) = D%NUMPP(:) +IALLMS(:) = D%NALLMS(:) +IPTRMS(:) = D%NPTRMS(:) +DO J=0,ISMAX + IKN(J)=2*DALD%NCPL2M(J) +ENDDO + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EDIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*)'EDIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('EDIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*)'EDIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EDIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EDIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('EDIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,KSORT) + + DEALLOCATE(IDIM0G) +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC + diff --git a/src/etrans/gpu/external/egath_grid.F90 b/src/etrans/gpu/external/egath_grid.F90 new file mode 100644 index 000000000..a53566a3d --- /dev/null +++ b/src/etrans/gpu/external/egath_grid.F90 @@ -0,0 +1,128 @@ +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM),INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID + diff --git a/src/etrans/gpu/external/egath_spec.F90 b/src/etrans/gpu/external/egath_spec.F90 new file mode 100644 index 000000000..0a7ea4ca1 --- /dev/null +++ b/src/etrans/gpu/external/egath_spec.F90 @@ -0,0 +1,204 @@ +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients + +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! R. El Khatib 23-Oct-2012 Monkey business +! P.Marguinaud 10-Oct-2013 Add an option to set (or not) first +! coefficients imaginary part to zero +! R. El Khatib 01-Dec-2020 Merge egath_spec_control and gath_spec_control +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_SPEC_CONTROL_MOD ,ONLY : GATH_SPEC_CONTROL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, IMSMAX, ISPEC2, ISPEC2_G,ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IMSMAX = RALD%NMSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +IF(PRESENT(KMSMAX)) IMSMAX = KMSMAX +ALLOCATE(IDIM0G(0:IMSMAX)) +ALLOCATE(IALLMS(IMSMAX+1)) +ALLOCATE(IKN(0:IMSMAX)) +IF(IMSMAX /= RALD%NMSMAX .OR. ISMAX /= R%NSMAX) THEN + CALL ABORT_TRANS('EGATH_SPEC:TRUNCATION CHANGE NOT YET CODED') +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) + ISPEC2MX = D%NSPEC2MX + IUMPP(:) = D%NUMPP(:) + IALLMS(:) = D%NALLMS(:) + IPTRMS(:) = D%NPTRMS(:) + DO J=0,IMSMAX + IKN(J)=2*DALD%NCPL2M(J) + ENDDO +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'EGATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('EGATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EGATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('EGATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EGATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EGATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & IMSMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,LDZA0IP) + +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC diff --git a/src/etrans/gpu/external/egpnorm_trans.F90 b/src/etrans/gpu/external/egpnorm_trans.F90 new file mode 100644 index 000000000..6a0e401db --- /dev/null +++ b/src/etrans/gpu/external/egpnorm_trans.F90 @@ -0,0 +1,90 @@ +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! R. El Khatib 16-Sep-2019 merge with global model code +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +CALL GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY) + +! note: weighting not taken into account by GPNORM_TRANS, so we do it here +PAVE=PAVE/G%NLOEN(1) + +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE EGPNORM_TRANS diff --git a/src/etrans/gpu/external/einv_trans.F90 b/src/etrans/gpu/external/einv_trans.F90 new file mode 100644 index 000000000..3b0c793bb --- /dev/null +++ b/src/etrans/gpu/external/einv_trans.F90 @@ -0,0 +1,604 @@ +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTL_MOD ,ONLY : EINV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1807,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +! Perform transform +CALL EINV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV ) + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANS + diff --git a/src/etrans/gpu/external/einv_transad.F90 b/src/etrans/gpu/external/einv_transad.F90 new file mode 100644 index 000000000..0d3f2dcda --- /dev/null +++ b/src/etrans/gpu/external/einv_transad.F90 @@ -0,0 +1,624 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: like in direct code: IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',0,ZHOOK_HANDLE) + +CALL ABORT_TRANS('Adjoint code of ectrans/lam not implemented on GPU yet') + +#ifdef UNDEF + +CALL GSTATS(1809,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'EINV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'EINV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('EINV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('EINV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL EINV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +#endif + + +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANSAD + diff --git a/src/etrans/gpu/external/esetup_trans.F90 b/src/etrans/gpu/external/esetup_trans.F90 new file mode 100644 index 000000000..d0b8d17e0 --- /dev/null +++ b/src/etrans/gpu/external/esetup_trans.F90 @@ -0,0 +1,351 @@ +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG, & + & LDUSEFFTW,LD_ALL_FFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs +! LD_ALL_FFTW : T to transform all fields in one call, F to transforms fields one after another + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 LENABLED +! R. El Khatib 01-Sep-2015 Support for FFTW +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPRINTLEV, MSETUP0, & + & NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL,NPROC,NPRTRV, MYPROC +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : FIELDS_RESOL +USE TPM_FLT ,ONLY : FLT_RESOL +USE TPM_CTL ,ONLY : CTL_RESOL + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FIELDS ,ONLY : ALDFIELDS_RESOL, FALD +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESETUP_DIMS_MOD ,ONLY : ESETUP_DIMS +USE SUEMP_TRANS_MOD ,ONLY : SUEMP_TRANS +USE SUEMP_TRANS_PRELEG_MOD ,ONLY : SUEMP_TRANS_PRELEG +USE ESETUP_GEOM_MOD ,ONLY : ESETUP_GEOM +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +#ifdef ACCGPU +USE OPENACC +#endif +#ifdef OMPGPU +USE OMP_LIB +#endif + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN) :: LD_ALL_FFTW + +!ifndef INTERFACE + +! Local variables +LOGICAL :: LLP1,LLP2 +#ifdef ACCGPU +INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE +#endif +INTEGER :: INUMDEVS, IUNIT, ISTAT, IDEV, MYGPU +INTEGER :: I, J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',0,ZHOOK_HANDLE) + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('ESETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE ESETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE ESETUP_TRANS ===' + +! Allocate resolution dependent structures common to global and LAM +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + NDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + ALLOCATE(CTL_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + NDEF_RESOL = NDEF_RESOL+1 + IF(NDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('ESETUP_TRANS:NDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF +! Allocate LAM-specific resolution dependent structures +IF(.NOT. ALLOCATED(ALDDIM_RESOL)) THEN + ALLOCATE(ALDDIM_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(ALDGEO_RESOL(NMAX_RESOL)) + ALLOCATE(ALDDISTR_RESOL(NMAX_RESOL)) +ENDIF + + +IF (PRESENT(KRESOL)) THEN + KRESOL=NDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL ESET_RESOL(NDEF_RESOL) +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + +! Defaults for optional arguments + +G%LREDUCED_GRID = .FALSE. +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +RALD%NMSMAX=KMSMAX +RALD%NDGUX=KDGUX +R%NDGL = KDGL +RALD%NDGLSUR=KDGL+2 +R%NDLON =KLOEN(1) + + +RALD%NLOENS_LON=(/ R%NDLON /) +RALD%NOFFSETS_LON=(/ 0 , R%NDLON+2 /) +RALD%NLOENS_LAT=(/ R%NDGL /) +RALD%NOFFSETS_LAT=(/ 0 , R%NDGL+2 /) + + +! IMPLICIT argument : +G%LAM = .TRUE. + +IF (KDGL <= 0) THEN + CALL ABORT_TRANS ('ESETUP_TRANS: KDGL IS NOT A POSITIVE NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF +IF(R%NTMAX /= R%NSMAX) THEN + !This SHOULD work but I don't know how to test it /MH + WRITE(NERR,*) 'R%NTMAX /= R%NSMAX',R%NTMAX,R%NSMAX + CALL ABORT_TRANS('ESETUP_TRANS:R%NTMAX /= R%NSMAX HAS NOT BEEN VALIDATED') +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF (PRESENT(KNOEXTZL)) THEN + R%NNOEXTZL=KNOEXTZL +ELSE + R%NNOEXTZL=0 +ENDIF + +IF (PRESENT(KNOEXTZG)) THEN + R%NNOEXTZG=KNOEXTZG +ELSE + R%NNOEXTZG=0 +ENDIF + +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL ESETUP_DIMS +IF (PRESENT(PEXWN)) GALD%EXWN=PEXWN +IF (PRESENT(PEYWN)) GALD%EYWN=PEYWN + + + +! GPU stuff: from setup_trans.F90 + +#ifdef ACCGPU +IDEVTYPE=ACC_GET_DEVICE_TYPE() +INUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) +MYGPU = MOD(MYPROC-1,INUMDEVS) +CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) +MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) +!ISTAT = CUDA_GETDEVICE(IDEV) +#endif + +#ifdef ACCGPU +!$ACC ENTER DATA COPYIN(D,R,G) +!$ACC ENTER DATA & +!$ACC& COPYIN(D%NUMP,D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,D%NPROCM,D%NPTRLS,D%MSTABF) & +!$ACC& COPYIN(R%NDGNH,R%NSMAX) & +!$ACC& COPYIN(G%NDGLU,G%NMEN,G%NLOEN) + +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(TO:F,D,D%NUMP,D%MYMS,R,R%NDGNH,R%NSMAX,G,G%NDGLU) +!$OMP TARGET ENTER DATA MAP(TO:D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,G%NMEN,D%NPROCM,D%NPTRLS,G,G%NLOEN,D%MSTABF) +#endif + +! First part of setup of distributed environment +CALL SUEMP_TRANS_PRELEG + +CALL GSTATS(1802,0) +! Compute arrays related to grid-point geometry +CALL ESETUP_GEOM +! Second part of setup of distributed environment +CALL SUEMP_TRANS +! Initialize Fast Fourier Transform package +CALL GSTATS(1802,1) + +! Signal the current resolution is active +LENABLED(NDEF_RESOL)=.TRUE. + + +#ifdef ACCGPU + WRITE(NOUT,*) 'Using OpenACC' +#endif +#ifdef OMPGPU + WRITE(NOUT,*) 'Using OpenMP offloading' +#endif + +#ifdef ACCGPU +!$ACC ENTER DATA COPYIN(R,D,G,DALD,FALD,RALD) +!$ACC ENTER DATA COPYIN(R%NSMAX,R%NTMAX,R%NDGL,R%NDGNH,D%NSTAGT0B,D%NSTAGT1B,& +!$ACC& D%NPNTGTB1,D%NPROCL,D%NUMP,D%NDGL_FS,D%MYMS,D%NASM0,D%NSTAGTF,D%MSTABF,& +!$ACC& D%NPNTGTB0,D%NPROCM,D%NPTRLS,G%NDGLU,G%NMEN,G%NLOEN,& +!$ACC& DALD%NESM0,DALD%NCPL2M, DALD%NPME, FALD%RLEPINM, & +!$ACC& RALD%NLOENS_LON,RALD%NOFFSETS_LON,RALD%NLOENS_LAT,RALD%NOFFSETS_LAT) +#endif +#ifdef OMPGPU +!$OMP TARGET ENTER DATA MAP(TO:R%NSMAX,R%NTMAX,R%NDGL,R%NDGNH,D%NSTAGT0B,D%NSTAGT1B) +!$OMP TARGET ENTER DATA MAP(TO:D%NPNTGTB1,D%NPROCL,D%NUMP,D%NDGL_FS,D%MYMS,D%NASM0,D%NSTAGTF,D%MSTABF) +!$OMP TARGET ENTER DATA MAP(TO:D%NPNTGTB0,D%NPROCM,D%NPTRLS,G%NDGLU,G%NMEN,G%NLOEN) +!$OMP TARGET ENTER DATA MAP(TO:DALD%NESM0,DALD%NCPL2M, DALD%NPME, FALD%RLEPINM) +!$OMP TARGET ENTER DATA MAP(TO:RALD%NLOENS_LON,RALD%NOFFSETS_LON,RALD%NLOENS_LAT,RALD%NOFFSETS_LAT +#endif + +WRITE(NOUT,*) '===GPU arrays successfully allocated' +#ifdef ACCGPU +!$ACC wait +#endif +#ifdef OMPGPU +!$OMP BARRIER +#endif + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!endif INTERFACE + +END SUBROUTINE ESETUP_TRANS + diff --git a/src/etrans/gpu/external/especnorm.F90 b/src/etrans/gpu/external/especnorm.F90 new file mode 100644 index 000000000..2bdb31290 --- /dev/null +++ b/src/etrans/gpu/external/especnorm.F90 @@ -0,0 +1,135 @@ +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESPNORM_CTL_MOD ,ONLY : ESPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KMASTER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PNORM(:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ESPECNORM',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'ESPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('ESPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + & NPRTRV,IFLD + CALL ABORT_TRANS('ESPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('ESPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL ESPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) +IF (LHOOK) CALL DR_HOOK('ESPECNORM',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM diff --git a/src/etrans/gpu/external/etrans_end.F90 b/src/etrans/gpu/external/etrans_end.F90 new file mode 100644 index 000000000..2435fd89b --- /dev/null +++ b/src/etrans/gpu/external/etrans_end.F90 @@ -0,0 +1,126 @@ +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Jul-2013 LENABLED +! R. El Khatib 01-Set-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_FLT ,ONLY : S, FLT_RESOL +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +CHARACTER*5 :: CLMODE +INTEGER(KIND=JPIM) :: JRES +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ETRANS_END',0,ZHOOK_HANDLE) +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + DO JRES=1,NDEF_RESOL + CALL EDEALLOC_RESOL(JRES) + ENDDO + NULLIFY(R) + IF (ALLOCATED(DIM_RESOL)) DEALLOCATE(DIM_RESOL) + NULLIFY(RALD) + IF (ALLOCATED(ALDDIM_RESOL)) DEALLOCATE(ALDDIM_RESOL) +!EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF +!TPM_DISTR + NULLIFY(D) + IF (ALLOCATED(DISTR_RESOL)) DEALLOCATE(DISTR_RESOL) + NULLIFY(DALD) + IF (ALLOCATED(ALDDISTR_RESOL)) DEALLOCATE(ALDDISTR_RESOL) +!TPM_FLT + NULLIFY(S) + IF (ALLOCATED(FLT_RESOL)) DEALLOCATE(FLT_RESOL) + +!TPM_FIELDS + NULLIFY(F) + IF (ALLOCATED(FIELDS_RESOL)) DEALLOCATE(FIELDS_RESOL) + NULLIFY(FALD) + IF (ALLOCATED(ALDFIELDS_RESOL)) DEALLOCATE(ALDFIELDS_RESOL) + +!TPM_GEOMETRY + NULLIFY(G) + IF(ALLOCATED(GEOM_RESOL)) DEALLOCATE(GEOM_RESOL) + NULLIFY(GALD) + IF(ALLOCATED(ALDGEO_RESOL)) DEALLOCATE(ALDGEO_RESOL) + + IF (ALLOCATED(LENABLED)) DEALLOCATE(LENABLED) + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF + +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF + !TPM_DISTR + IF (ALLOCATED(NPRCIDS)) DEALLOCATE(NPRCIDS) +ENDIF +IF (LHOOK) CALL DR_HOOK('ETRANS_END',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_END + diff --git a/src/etrans/gpu/external/etrans_inq.F90 b/src/etrans/gpu/external/etrans_inq.F90 new file mode 100644 index 000000000..1d580d60f --- /dev/null +++ b/src/etrans/gpu/external/etrans_inq.F90 @@ -0,0 +1,539 @@ +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M ,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resolution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension +! N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID +! T. Dalkilic 28-Aug-2012 KCPL4M +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPMALD_FIELDS + +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, IM, JJ, JM +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX),ICPLM(0:RALD%NMSMAX) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KESM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KESM0,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 TOO SMALL') + ELSE + KESM0(0:RALD%NMSMAX) = DALD%NESM0(:) + ENDIF +ENDIF + +IF(PRESENT(KCPL2M)) THEN + IF(UBOUND(KCPL2M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL2M TOO SMALL') + ELSE + KCPL2M(0:RALD%NMSMAX) = DALD%NCPL2M(:) + ENDIF +ENDIF +IF(PRESENT(KPROCM)) THEN + IF(UBOUND(KPROCM,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPROCM TOO SMALL') + ELSE + KPROCM(0:RALD%NMSMAX) = D%NPROCM(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < RALD%NMSMAX+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:RALD%NMSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:RALD%NMSMAX) = D%NDIM0G(0:RALD%NMSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('ETRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE + IU1 = MIN(IU1,R%NLEI3) + IU2 = MIN(IU2,D%NSPOLEGL) + PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(KMSMAX)) KMSMAX = RALD%NMSMAX +IF(PRESENT(PLEPINM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLEPINM,1) < R%NSPEC_G/2) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM TOO SMALL') + ELSEIF (LBOUND(PLEPINM,1) /= -1) THEN + CALL ABORT_TRANS('ETRANS_INQ: LOWER BOUND OF PLEPINM SHOULD BE -1') + ELSE + PLEPINM(:) = FALD%RLEPINM(:) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KNVALUE(IC+JJ)=JN + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KMVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KMVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KMVALUE(IC+JJ)=IM + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KCPL4M)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KCPL4M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + KCPL4M(JM) = 4*(ISNAX(JM)+1) + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(LDLINEAR_GRID)) THEN + LDLINEAR_GRID = R%NSMAX > (R%NDGL -1)/3 .OR. RALD%NMSMAX > (R%NDLON -1)/3 +ENDIF + + +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_INQ diff --git a/src/etrans/gpu/external/etrans_release.F90 b/src/etrans/gpu/external/etrans_release.F90 new file mode 100644 index 000000000..ea4f5a8a2 --- /dev/null +++ b/src/etrans/gpu/external/etrans_release.F90 @@ -0,0 +1,51 @@ +SUBROUTINE ETRANS_RELEASE(KRESOL) + +!**** *ETRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL ETRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL EDEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE ETRANS_RELEASE diff --git a/src/etrans/gpu/internal/cpl_int_mod.F90 b/src/etrans/gpu/internal/cpl_int_mod.F90 new file mode 100644 index 000000000..2b55a5b22 --- /dev/null +++ b/src/etrans/gpu/internal/cpl_int_mod.F90 @@ -0,0 +1,33 @@ +MODULE CPL_INT_MOD +CONTAINS +SUBROUTINE CPL_INT(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,CPL_PROC,KPTRGP) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE +INTEGER(KIND=JPIM), INTENT(IN) :: KENDROWL +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KFFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEN +INTEGER(KIND=JPIM), INTENT(IN) :: KSTA(KENDROWL) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB), INTENT(INOUT) :: PGTF(KFIELDS,KLEN) +EXTERNAL CPL_PROC + +INTEGER(KIND=JPIM) :: IPTRGP(KFIELDS) +INTEGER(KIND=JPIM) :: J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!-------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',0,ZHOOK_HANDLE) +IF(PRESENT(KPTRGP)) THEN + IPTRGP(:)=KPTRGP(1:KFIELDS) +ELSE + DO J=1,KFIELDS + IPTRGP(J)=J + ENDDO +ENDIF +CALL CPL_PROC(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,IPTRGP) +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',1,ZHOOK_HANDLE) +END SUBROUTINE CPL_INT +END MODULE CPL_INT_MOD diff --git a/src/etrans/gpu/internal/easre1b_mod.F90 b/src/etrans/gpu/internal/easre1b_mod.F90 new file mode 100644 index 000000000..79c009857 --- /dev/null +++ b/src/etrans/gpu/internal/easre1b_mod.F90 @@ -0,0 +1,93 @@ +MODULE EASRE1B_MOD +CONTAINS +SUBROUTINE EASRE1B(KFIELD,PFFT,FOUBUF_IN) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PFFT(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: FOUBUF_IN(:) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +INTEGER(KIND=JPIM) :: JM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + + +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',0,ZHOOK_HANDLE) + +!$acc parallel loop collapse(3) private (JM, JGL, JFLD, IPROC, IISTAN) & +!$acc& present (PFFT, D%NSTAGT0B, D%NPNTGTB1, D%NPROCL, D%NUMP, R%NDGL, FOUBUF_IN) & +!$acc& copyin(KFIELD) default(none) +DO JM = 1, D%NUMP !100 + DO JGL=1,R%NDGL !400 + DO JFLD =1,2*KFIELD !500 + IPROC=D%NPROCL(JGL) + IISTAN=(D%NPNTGTB1(JM,JGL))*2*KFIELD + FOUBUF_IN(IISTAN+JFLD)=PFFT(JGL,JM,JFLD) + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1B +END MODULE EASRE1B_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/edealloc_resol_mod.F90 b/src/etrans/gpu/internal/edealloc_resol_mod.F90 new file mode 100644 index 000000000..dc7fdd452 --- /dev/null +++ b/src/etrans/gpu/internal/edealloc_resol_mod.F90 @@ -0,0 +1,101 @@ +MODULE EDEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE EDEALLOC_RESOL(KRESOL) + +!**** *EDEALLOC_RESOL_MOD* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL EDEALLOC_RESOL_MOD + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from etrans_end +! B. Bochenek (Apr 2015): Phasing: update +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LENABLED, NOUT +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPM_DIM ,ONLY : R +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FIELDS ,ONLY : FALD +USE TPM_HICFFT, ONLY: CLEAN_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' EDEALLOC_RESOL WARNING: KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL ESET_RESOL(KRESOL) + +!$ACC EXIT DATA DELETE(R%NSMAX,R%NTMAX,R%NDGL,R%NDGNH,D%NSTAGT0B,D%NSTAGT1B,& +!$ACC& D%NPNTGTB1,D%NPROCL,D%NUMP,D%NDGL_FS,D%MYMS,D%NASM0,D%NSTAGTF,D%MSTABF,& +!$ACC& D%NPNTGTB0,D%NPROCM,D%NPTRLS,G%NDGLU,G%NMEN,G%NLOEN,& +!$ACC& DALD%NESM0,DALD%NCPL2M, DALD%NPME, FALD%RLEPINM, & +!$ACC& RALD%NLOENS_LON,RALD%NOFFSETS_LON,RALD%NLOENS_LAT,RALD%NOFFSETS_LAT) +!$ACC EXIT DATA DELETE(R,D,G,DALD,FALD,RALD) + + CALL CLEAN_FFT(KRESOL) + + !TPM_DISTR + DEALLOCATE(D%NFRSTLAT,D%NLSTLAT,D%NPTRLAT,D%NPTRFRSTLAT,D%NPTRLSTLAT) + DEALLOCATE(D%LSPLITLAT,D%NSTA,D%NONL,D%NGPTOTL,D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(.NOT.D%LGRIDONLY) THEN + + DEALLOCATE(D%MYMS,D%NUMPP,D%NPOSSP,D%NPROCM,D%NDIM0G,D%NASM0,D%NATM0) + DEALLOCATE(D%NLATLS,D%NLATLE,D%NPMT,D%NPMS,D%NPMG,D%NULTPP,D%NPROCL) + DEALLOCATE(D%NPTRLS,D%NALLMS,D%NPTRMS,D%NSTAGT0B,D%NSTAGT1B,D%NPNTGTB0) + DEALLOCATE(D%NPNTGTB1,D%NLTSFTB,D%NLTSGTB,D%MSTABF) + DEALLOCATE(D%NSTAGTF) + DEALLOCATE(G%NMEN,G%NDGLU) + + ELSE + + DEALLOCATE(G%NLOEN) + + ENDIF + + LENABLED(KRESOL)=.FALSE. + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EDEALLOC_RESOL +END MODULE EDEALLOC_RESOL_MOD diff --git a/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 b/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 new file mode 100644 index 000000000..205a2369b --- /dev/null +++ b/src/etrans/gpu/internal/edir_trans_ctl_mod.F90 @@ -0,0 +1,182 @@ +MODULE EDIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PMEANU,PMEANV - mean winds +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! G. Radnoti 01-03-13 adaptation to aladin +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : GROWING_ALLOCATION + + +USE ELTDIR_MOD +USE TRLTOM_PACK_UNPACK, ONLY : TRLTOM_PACK, TRLTOM_PACK_HANDLE, PREPARE_TRLTOM_PACK +USE TRLTOM_MOD +USE FTDIR_MOD +USE EFTDIR_MOD +USE TRGTOL_MOD +USE BUFFERED_ALLOCATOR_MOD + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables + +REAL(KIND=JPRB), POINTER :: FOUBUF(:), FOUBUF_IN(:), PREEL(:), PREEL_COMPLEX(:) +TYPE(BUFFERED_ALLOCATOR) :: ALLOCATOR +TYPE(FTDIR_HANDLE) :: HFTDIR +TYPE(ELTDIR_HANDLE) :: HELTDIR +TYPE(TRLTOM_HANDLE) :: HTRLTOM +TYPE(TRLTOM_PACK_HANDLE) :: HTRLTOM_PACK +TYPE(TRGTOL_HANDLE) :: HTRGTOL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',0,ZHOOK_HANDLE) +IF(NPROMATR > 0) THEN + print *, "This is currently not supported and/or tested (NPROMATR > 0)" + stop 24 +ENDIF + + +! Prepare everything +ALLOCATOR = MAKE_BUFFERED_ALLOCATOR() +HTRGTOL = PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) ! ZCOMBUFR, ZCOMBUFS and PREEL +IF (KF_FS > 0) THEN + HFTDIR = PREPARE_FTDIR(ALLOCATOR,KF_FS) ! PREEL_COMPLEX + HTRLTOM_PACK = PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) ! FOUBUF_IN + HTRLTOM = PREPARE_TRLTOM(ALLOCATOR, KF_FS) ! FOUBUF + HELTDIR = PREPARE_ELTDIR(ALLOCATOR, KF_FS, KF_UV) +ENDIF + +CALL INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) + +! from the PGP arrays to PREEL_REAL +CALL TRGTOL(ALLOCATOR,HTRGTOL,PREEL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +IF (KF_FS > 0) THEN + + ! fourier transform from PREEL_REAL to PREEL_COMPLEX (in-place!) + CALL GSTATS(1640,0) + CALL EFTDIR(ALLOCATOR,HFTDIR,PREEL,PREEL_COMPLEX,KF_FS,AUX_PROC=AUX_PROC) + CALL GSTATS(1640,1) + + CALL GSTATS(153,0) + CALL TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) ! formerly known as efourier_out + CALL TRLTOM(ALLOCATOR,HTRLTOM,FOUBUF_IN,FOUBUF,KF_FS) + CALL GSTATS(153,1) + + CALL ELTDIR(ALLOCATOR,HELTDIR,KF_FS,KF_UV,KF_SCALARS,FOUBUF, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2, & + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + +ENDIF + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTL +END MODULE EDIR_TRANS_CTL_MOD diff --git a/src/etrans/gpu/internal/efsc_mod.F90 b/src/etrans/gpu/internal/efsc_mod.F90 new file mode 100644 index 000000000..46d8dc0d0 --- /dev/null +++ b/src/etrans/gpu/internal/efsc_mod.F90 @@ -0,0 +1,192 @@ +MODULE EFSC_MOD +CONTAINS +SUBROUTINE EFSC(PREEL,KF_UV,KF_SCALARS,KF_SCDERS,KF_FS) +!SUBROUTINE EFSC(KF_UV,KF_SCALARS,KF_SCDERS,& +! & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER, LVORGP, LDIVGP +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +USE TPMALD_GEO ,ONLY : GALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) , INTENT(INOUT) :: PREEL(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_UV,KF_SCALARS,KF_SCDERS, KF_FS + +INTEGER(KIND=JPIM) :: JLOEN_MAX + +INTEGER(KIND=JPIM) :: JF,IGLG,JM,JGL +REAL(KIND=JPRB) :: ZIM +INTEGER(KIND=JPIM) :: I_UV_OFFSET, I_SC_OFFSET, I_SCDERS_OFFSET, I_UVDERS_OFFSET, IST +INTEGER(KIND=JPIM) :: IOFF_LAT,IOFF_UV,IOFF_UV_EWDER, IOFF_SCALARS, IOFF_SCALARS_EWDER +REAL(KIND=JPRB) :: RET_REAL, RET_COMPLEX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',0,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 0 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + I_UV_OFFSET=IST + + IST = IST+2*KF_UV + I_SC_OFFSET=IST + + IST = IST+KF_SCALARS + !I_NSDERS_OFFSET=IST + + IST = IST+KF_SCDERS + IF(LUVDER) THEN + I_UVDERS_OFFSET=IST + IST = IST+2*KF_UV + ENDIF + + IF(KF_SCDERS > 0) THEN + I_SCDERS_OFFSET=IST + ENDIF +ENDIF + +#ifdef ACCGPU +!$ACC DATA & +!$ACC& PRESENT(D%NPTRLS,D%NSTAGTF,PREEL,G%NMEN, D) +#endif + + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +JLOEN_MAX=MAXVAL(G%NMEN) + +IF (LUVDER) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_UVPREEL) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IGLG,IOFF_LAT,IOFF_UV,IOFF_UV_EWDER,ZIM,RET_REAL,RET_COMPLEX,JM,JF,JGL) & + !$ACC& FIRSTPRIVATE(KF_UV,I_UVDERS_OFFSET,I_UV_OFFSET,KF_FS) +#endif + DO JGL=1,D%NDGL_FS + DO JF=1,2*KF_UV + DO JM=0,JLOEN_MAX/2 + IGLG = D%NPTRLS(MYSETW)+JGL-1 + IOFF_LAT = KF_FS*D%NSTAGTF(JGL) + IOFF_UV = IOFF_LAT+(I_UV_OFFSET+JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL)) + IOFF_UV_EWDER = IOFF_LAT+(I_UVDERS_OFFSET+JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL)) + + RET_REAL = 0.0_JPRBT + RET_COMPLEX = 0.0_JPRBT + + IF (JM <= G%NMEN(IGLG)) THEN + ZIM = REAL(JM,JPRB)*GALD%EXWN + + RET_REAL = & + & -PREEL(IOFF_UV+2*JM+2)*ZIM + RET_COMPLEX = & + & PREEL(IOFF_UV+2*JM+1)*ZIM + ENDIF + PREEL(IOFF_UV_EWDER+2*JM+1) = RET_REAL + PREEL(IOFF_UV_EWDER+2*JM+2) = RET_COMPLEX + ENDDO + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF (KF_SCDERS > 0) THEN +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_SCALARS,PEWDERS,PSCALAR) +#endif +#ifdef ACCGPU + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IGLG,IOFF_LAT,IOFF_SCALARS_EWDER,IOFF_SCALARS,ZIM,RET_REAL,RET_COMPLEX) & + !$ACC& FIRSTPRIVATE(KF_SCALARS,I_SCDERS_OFFSET,I_SC_OFFSET,KF_FS) +#endif + DO JGL=1,D%NDGL_FS + DO JF=1,KF_SCALARS + DO JM=0,JLOEN_MAX/2 + IGLG = D%NPTRLS(MYSETW)+JGL-1 + IOFF_LAT = KF_FS*D%NSTAGTF(JGL) + IOFF_SCALARS_EWDER = IOFF_LAT+(I_SCDERS_OFFSET+JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL)) + IOFF_SCALARS = IOFF_LAT+(I_SC_OFFSET+JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL)) + + RET_REAL = 0.0_JPRBT + RET_COMPLEX = 0.0_JPRBT + + IF (JM <= G%NMEN(IGLG)) THEN + ZIM = REAL(JM,JPRB)*GALD%EXWN + + RET_REAL = & + & -PREEL(IOFF_SCALARS+2*JM+2)*ZIM + RET_COMPLEX = & + & PREEL(IOFF_SCALARS+2*JM+1)*ZIM + ENDIF + ! The rest from G_NMEN(IGLG+1)...MAX is zero truncated + PREEL(IOFF_SCALARS_EWDER+2*JM+1) = RET_REAL + PREEL(IOFF_SCALARS_EWDER+2*JM+2) = RET_COMPLEX + ENDDO + ENDDO + ENDDO +ENDIF + +#ifdef ACCGPU +!$ACC END DATA +#endif + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFSC +END MODULE EFSC_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eftdir_mod.F90 b/src/etrans/gpu/internal/eftdir_mod.F90 new file mode 100644 index 000000000..9a610d243 --- /dev/null +++ b/src/etrans/gpu/internal/eftdir_mod.F90 @@ -0,0 +1,92 @@ +MODULE EFTDIR_MOD +CONTAINS +SUBROUTINE EFTDIR(ALLOCATOR,HFTDIR,PREEL,PREEL_COMPLEX,KF_FS,AUX_PROC) + +USE PARKIND1 ,ONLY : JPIM, JPRB, JPIB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN, ONLY: NCUR_RESOL +USE TPM_DISTR ,ONLY : D +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE TPM_HICFFT ,ONLY : EXECUTE_DIR_FFT +USE FTDIR_MOD, ONLY : FTDIR_HANDLE + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE ISO_C_BINDING +USE BUFFERED_ALLOCATOR_MOD + +! + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +TYPE(FTDIR_HANDLE) :: HFTDIR +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +REAL(KIND=JPRB), INTENT(INOUT), POINTER :: PREEL(:) ! (IRLEN+2)*NDGLG*KF_FS +REAL(KIND=JPRB), INTENT(OUT), POINTER :: PREEL_COMPLEX(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +INTEGER(KIND=JPIM) :: JLOT, IRLEN + +REAL(KIND=JPRB) :: ZDUM +INTEGER(KIND=JPIM) :: INUL + +REAL(KIND=JPRB), POINTER, SAVE :: ZREEL(:), ZREEL_COMPLEX(:) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',0,ZHOOK_HANDLE) + +IRLEN=R%NDLON+R%NNOEXTZG + + +#ifdef IN_PLACE_FFT + PREEL_COMPLEX => PREEL +#else + CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& + & 1_JPIB, 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1))) +#endif + + + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + !!! FIXME !!! CALL EXTPER(PREEL,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,D%NSTAGTF,0) + CALL ABORT('FIXME') +ENDIF +IF (PRESENT(AUX_PROC)) THEN + !!! FIXME !!! CALL AUX_PROC(PREEL,ZDUM,KF_FS,D%NLENGTF,1,D%NDGL_FS,0,.TRUE.,& + !!! FIXME !!! & D%NSTAGTF,INUL,INUL,INUL) + CALL ABORT('FIXME') +ENDIF + + +JLOT=SIZE(PREEL)/(IRLEN+2) + +IF (JLOT==0) THEN + IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + RETURN +ENDIF + +!$ACC DATA PRESENT(PREEL,RALD%NLOENS_LON,RALD%NOFFSETS_LON) + +CALL EXECUTE_DIR_FFT(PREEL(:),PREEL_COMPLEX(:),NCUR_RESOL,JLOT, & + & LOENS=RALD%NLOENS_LON, & + & OFFSETS=RALD%NOFFSETS_LON,ALLOC=ALLOCATOR%PTR) + +!$ACC END DATA + + +IF (LHOOK) CALL DR_HOOK('EFTDIR_MOD:EFTDIR',1,ZHOOK_HANDLE) + +END SUBROUTINE EFTDIR +END MODULE EFTDIR_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eftinv_mod.F90 b/src/etrans/gpu/internal/eftinv_mod.F90 new file mode 100644 index 000000000..553508787 --- /dev/null +++ b/src/etrans/gpu/internal/eftinv_mod.F90 @@ -0,0 +1,66 @@ +MODULE EFTINV_MOD +CONTAINS +SUBROUTINE EFTINV(ALLOCATOR,HFTINV,PREEL,PREEL_REAL,KF_FS) + +USE PARKIND1 ,ONLY : JPIM, JPRB, JPIB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN, ONLY : NCUR_RESOL +USE TPM_DISTR ,ONLY : D +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE TPM_HICFFT ,ONLY : EXECUTE_INV_FFT + +USE FTINV_MOD, ONLY : FTINV_HANDLE +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE ISO_C_BINDING +USE BUFFERED_ALLOCATOR_MOD + +! + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +TYPE(FTINV_HANDLE), INTENT(IN) :: HFTINV +REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PREEL(:) ! (IRLEN+2)*KF_FS*NDGL_FS +REAL(KIND=JPRB), POINTER, INTENT(OUT) :: PREEL_REAL(:) ! (IRLEN+2)*KF_FS*NDGL_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS + +INTEGER(KIND=JPIM) :: JLOT, IRLEN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('EFTINV_MOD:EFTINV',0,ZHOOK_HANDLE) + +IRLEN=R%NDLON+R%NNOEXTZG + +#ifdef IN_PLACE_FFT + PREEL_REAL => PREEL +#else + CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& + & 1_JPIB, 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))) +#endif + +JLOT=SIZE(PREEL)/(IRLEN+2) + +IF (JLOT==0) THEN + IF (LHOOK) CALL DR_HOOK('EFTINV_MOD:EFTINV',1,ZHOOK_HANDLE) + RETURN +ENDIF + +!$ACC DATA PRESENT(PREEL,RALD%NLOENS_LON,RALD%NOFFSETS_LON) +CALL EXECUTE_INV_FFT(PREEL(:),PREEL_REAL(:),NCUR_RESOL,JLOT, & + & LOENS=RALD%NLOENS_LON, & + & OFFSETS=RALD%NOFFSETS_LON,ALLOC=ALLOCATOR%PTR) +!$ACC END DATA + +IF (LHOOK) CALL DR_HOOK('EFTINV_MOD:EFTINV',1,ZHOOK_HANDLE) + +END SUBROUTINE EFTINV +END MODULE EFTINV_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/egath_spec_control_mod.F90 b/src/etrans/gpu/internal/egath_spec_control_mod.F90 new file mode 100644 index 000000000..146b6fc97 --- /dev/null +++ b/src/etrans/gpu/internal/egath_spec_control_mod.F90 @@ -0,0 +1,201 @@ +MODULE EGATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KCPL2M,LDZA0IP) + +!**** *GATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYPROC, NPROC +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SET2PE_MOD ,ONLY : SET2PE + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KCPL2M(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS + +! ------------------------------------------------------------------ + + +CALL ABORT_TRANS('EGATH_SPEC_CONTROL:DEAD CODE') +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=0,KCPL2M(JM)/2-1 + IDIST(II+1) = KDIM0G(JM)+4*JN + IDIST(II+2) = KDIM0G(JM)+4*JN+1 + IDIST(II+3) = KDIM0G(JM)+4*JN+2 + IDIST(II+4) = KDIM0G(JM)+4*JN+3 + II = II+4 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='GATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("GATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('GATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC_CONTROL +END MODULE EGATH_SPEC_CONTROL_MOD + + diff --git a/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 b/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 new file mode 100644 index 000000000..a3c83743d --- /dev/null +++ b/src/etrans/gpu/internal/einv_trans_ctl_mod.F90 @@ -0,0 +1,201 @@ +MODULE EINV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PSPMEANU,PSPMEANV) + +!**** *EINV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, GROWING_ALLOCATION + +! + +USE ELTINV_MOD +USE TRMTOL_PACK_UNPACK, ONLY : TRMTOL_UNPACK, TRMTOL_UNPACK_HANDLE, PREPARE_TRMTOL_UNPACK +USE TRMTOL_MOD +USE EFSC_MOD +USE EFTINV_MOD +USE FTINV_MOD, ONLY : FTINV_HANDLE, PREPARE_FTINV +USE TRLTOG_MOD +USE BUFFERED_ALLOCATOR_MOD + + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +! Local variables + +REAL(KIND=JPRB), POINTER :: PREEL(:), FOUBUF(:), FOUBUF_IN(:), PREEL_REAL(:) + +INTEGER(KIND=JPIM) :: ILEI2, IDIM1 +TYPE(BUFFERED_ALLOCATOR) :: ALLOCATOR +TYPE(FTINV_HANDLE) :: HFTINV +TYPE(ELTINV_HANDLE) :: HELTINV +TYPE(TRMTOL_HANDLE) :: HTRMTOL +TYPE(TRMTOL_UNPACK_HANDLE) :: HTRMTOL_UNPACK +TYPE(TRLTOG_HANDLE) :: HTRLTOG + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',0,ZHOOK_HANDLE) + +IF(NPROMATR > 0) THEN + print *, "This is currently not supported and/or tested (NPROMATR > 0)" + stop 24 +ENDIF + + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT + +ALLOCATOR = MAKE_BUFFERED_ALLOCATOR() +HELTINV = PREPARE_ELTINV(ALLOCATOR,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT) ! ZFFT, FOUBUF_IN +HTRMTOL = PREPARE_TRMTOL(ALLOCATOR,KF_OUT_LT) ! FOUBUF +HTRMTOL_UNPACK = PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) ! HREEL +HFTINV = PREPARE_FTINV(ALLOCATOR,KF_FS) ! HREEL_REAL +HTRLTOG = PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) ! COMBUFR and COMBUFS +CALL INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) + +IF(KF_OUT_LT > 0) THEN + CALL GSTATS(1647,0) + CALL ELTINV(ALLOCATOR, HELTINV, KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,FOUBUF_IN,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & FSPGL_PROC=FSPGL_PROC,PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV) + CALL GSTATS(1647,1) + + CALL GSTATS(152,0) + CALL TRMTOL(ALLOCATOR,HTRMTOL,FOUBUF_IN,FOUBUF,KF_OUT_LT) + CALL TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL,KF_OUT_LT,KF_FS) ! Formerly known as fourier_in routine + CALL GSTATS(152,1) +ENDIF + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSC(PREEL, KF_UV, KF_SCALARS, KF_SCDERS,KF_FS) +ENDIF + +IF ( KF_FS > 0 ) THEN + + CALL EFTINV(ALLOCATOR,HFTINV,PREEL,PREEL_REAL,KF_FS) + +ENDIF + + +CALL TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTL +END MODULE EINV_TRANS_CTL_MOD diff --git a/src/etrans/gpu/internal/eledir_mod.F90 b/src/etrans/gpu/internal/eledir_mod.F90 new file mode 100644 index 000000000..99d14d5d3 --- /dev/null +++ b/src/etrans/gpu/internal/eledir_mod.F90 @@ -0,0 +1,105 @@ +MODULE ELEDIR_MOD +CONTAINS +SUBROUTINE ELEDIR(ALLOCATOR,PFFT,PFFT_OUT) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN, ONLY: NCUR_RESOL +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD + +USE TPM_HICFFT ,ONLY : EXECUTE_DIR_FFT + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE ISO_C_BINDING +USE BUFFERED_ALLOCATOR_MOD + +! + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PFFT(:,:,:) +REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PFFT_OUT(:,:,:) + +INTEGER(KIND=JPIM) :: JLOT +TYPE(C_PTR) :: IPLAN_C2R +REAL (KIND=JPRB), POINTER :: ZFFT_L(:), ZFFT_L_OUT(:) ! 1D copy +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEDIR_MOD:ELEDIR',0,ZHOOK_HANDLE) + +JLOT=UBOUND(PFFT,2)*UBOUND (PFFT,3) + +CALL C_F_POINTER(C_LOC(PFFT), ZFFT_L, (/UBOUND(PFFT,1)*UBOUND(PFFT,2)*UBOUND(PFFT,3)/) ) +CALL C_F_POINTER(C_LOC(PFFT_OUT), ZFFT_L_OUT, (/UBOUND(PFFT_OUT,1)*UBOUND(PFFT_OUT,2)*UBOUND(PFFT_OUT,3)/) ) + +IF (JLOT==0) THEN + IF (LHOOK) CALL DR_HOOK('ELEDIR_MOD:ELEDIR',1,ZHOOK_HANDLE) + RETURN +ENDIF + +!$ACC DATA PRESENT(ZFFT_L,ZFFT_L_OUT,RALD%NLOENS_LAT,RALD%NOFFSETS_LAT) +CALL EXECUTE_DIR_FFT(ZFFT_L(:),ZFFT_L_OUT(:),NCUR_RESOL,-JLOT, & ! -JLOT to have hicfft make distinction between zonal and meridional direction. Don't worry, abs(JLOT) is used internally ... + & LOENS=RALD%NLOENS_LAT, & + & OFFSETS=RALD%NOFFSETS_LAT,ALLOC=ALLOCATOR%PTR) +!$ACC END DATA + +IF (LHOOK) CALL DR_HOOK('ELEDIR_MOD:ELEDIR',1,ZHOOK_HANDLE) + +END SUBROUTINE ELEDIR +END MODULE ELEDIR_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eleinv_mod.F90 b/src/etrans/gpu/internal/eleinv_mod.F90 new file mode 100644 index 000000000..d5fe2d0c2 --- /dev/null +++ b/src/etrans/gpu/internal/eleinv_mod.F90 @@ -0,0 +1,103 @@ +MODULE ELEINV_MOD +CONTAINS +SUBROUTINE ELEINV(ALLOCATOR,PFFT,PFFT_OUT) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB, JPIB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN, ONLY : NCUR_RESOL +USE TPM_DISTR ,ONLY : D +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE TPM_HICFFT ,ONLY : EXECUTE_INV_FFT + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE ISO_C_BINDING +USE BUFFERED_ALLOCATOR_MOD + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +REAL(KIND=JPRB), INTENT(INOUT) :: PFFT(:,:,:), PFFT_OUT(:,:,:) + +INTEGER(KIND=JPIM) :: JLOT +REAL (KIND=JPRB), POINTER :: ZFFT_L(:), ZFFT_L_OUT(:) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',0,ZHOOK_HANDLE) + +JLOT=UBOUND(PFFT,2)*UBOUND (PFFT,3) + +CALL C_F_POINTER(C_LOC(PFFT), ZFFT_L, (/UBOUND(PFFT,1)*UBOUND(PFFT,2)*UBOUND(PFFT,3)/) ) +CALL C_F_POINTER(C_LOC(PFFT_OUT), ZFFT_L_OUT, (/UBOUND(PFFT_OUT,1)*UBOUND(PFFT_OUT,2)*UBOUND(PFFT_OUT,3)/) ) + +IF (JLOT==0) THEN + IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + RETURN +ENDIF + +!$ACC DATA PRESENT(ZFFT_L,ZFFT_L_OUT,RALD%NLOENS_LAT,RALD%NOFFSETS_LAT) +CALL EXECUTE_INV_FFT(ZFFT_L(:),ZFFT_L_OUT(:),NCUR_RESOL,-JLOT, & + & RALD%NLOENS_LAT, & + & RALD%NOFFSETS_LAT,ALLOCATOR%PTR) +!$ACC END DATA + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + +END SUBROUTINE ELEINV +END MODULE ELEINV_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eltdir_mod.F90 b/src/etrans/gpu/internal/eltdir_mod.F90 new file mode 100644 index 000000000..eb51bbaec --- /dev/null +++ b/src/etrans/gpu/internal/eltdir_mod.F90 @@ -0,0 +1,258 @@ +#define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) +MODULE ELTDIR_MOD + USE BUFFERED_ALLOCATOR_MOD + + IMPLICIT NONE + + PRIVATE + PUBLIC :: ELTDIR, ELTDIR_HANDLE, PREPARE_ELTDIR + + TYPE ELTDIR_HANDLE + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFFT + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFFT_OUT + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HVODI + END TYPE + +CONTAINS + FUNCTION PREPARE_ELTDIR(ALLOCATOR,KF_FS,KF_UV) RESULT(HELTDIR) + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE TPM_DISTR, ONLY: D + USE TPM_DIM, ONLY: R + USE TPMALD_DIM ,ONLY : RALD + USE ISO_C_BINDING + !USE LEINV_MOD + + IMPLICIT NONE + + TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR + INTEGER(KIND=JPIM), INTENT(IN) ::KF_FS, KF_UV + + TYPE(ELTDIR_HANDLE) :: HELTDIR + + INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ + REAL(KIND=JPRBT) :: ZPRBT_DUMMY + + ! ZFFT + IALLOC_SZ = ALIGN(1_JPIB*(RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*2*KF_FS*SIZEOF(ZPRBT_DUMMY), 128) + HELTDIR%HFFT = RESERVE(ALLOCATOR, IALLOC_SZ) + + ! ZFFT_OUT +#ifndef IN_PLACE_FFT + IALLOC_SZ = ALIGN(1_JPIB*(RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*2*KF_FS*SIZEOF(ZPRBT_DUMMY), 128) + HELTDIR%HFFT_OUT = RESERVE(ALLOCATOR, IALLOC_SZ) +#endif + + ! ZVODI + IALLOC_SZ = ALIGN(1_JPIB*(RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*MAX(4*KF_UV,1)*SIZEOF(ZPRBT_DUMMY), 128) + HELTDIR%HVODI = RESERVE(ALLOCATOR, IALLOC_SZ) + + END FUNCTION PREPARE_ELTDIR + +SUBROUTINE ELTDIR(ALLOCATOR,HELTDIR,KF_FS,KF_UV,KF_SCALARS,FOUBUF,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + + +USE ISO_C_BINDING + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2B_MOD ,ONLY : EPRFI2B +USE ELEDIR_MOD ,ONLY : ELEDIR +USE EUVTVD_MOD +USE EUVTVD_COMM_MOD +USE EUPDSP_MOD ,ONLY : EUPDSP +USE EXTPER_MOD ,ONLY : EXTPER + +! +!**** *ELTDIR* - Control of Direct Legendre transform step + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIR(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2 - prepares the Fourier work arrays for model variables +! ELEDIR - direct Legendre transform +! EUVTVD - +! EUPDSP - updating of spectral arrays (fields) +! EUVTVD_COMM - +! EXTPER - + + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-03-14 G. Radnoti aladin version +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +TYPE(ELTDIR_HANDLE), INTENT(IN) :: HELTDIR + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: FOUBUF(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IM, JM +INTEGER(KIND=JPIM) :: IUS,IVS,IVORS,IDIVS, IUE, IVE, IVORE, IDIVE + +REAL(KIND=JPRB), POINTER :: ZFFT(:,:,:), ZFFT_L(:), ZFFT_OUT(:,:,:), ZFFT_L_OUT(:) +REAL(KIND=JPRB), POINTER :: ZVODI(:,:,:), ZVODI_L(:) +INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',0,ZHOOK_HANDLE) + +! ZFFT(RALD%NDGLSUR+R%NNOEXTZG,2*KF_FS,D%NUMP) +IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*2*KF_FS*SIZEOF(ZFFT_L(1)), 128) +CALL ASSIGN_PTR(ZFFT_L, GET_ALLOCATION(ALLOCATOR, HELTDIR%HFFT),& + & 1_JPIB, IALLOC_SZ) +CALL C_F_POINTER(C_LOC(ZFFT_L), ZFFT, (/ RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,2*KF_FS /)) + +!* 1. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2B(KF_FS,ZFFT,FOUBUF) + +!* 2. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + +IF(R%NNOEXTZG>0) THEN + CALL ABORT('NNOEXTZG>0 not supported on GPU') +ENDIF + +!* 3. DIRECT LEGENDRE TRANSFORM. +! -------------------------- + +#ifdef IN_PLACE_FFT + ZFFT_OUT => ZFFT +#else + IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*2*KF_FS*SIZEOF(ZFFT_L(1)), 128) + CALL ASSIGN_PTR(ZFFT_L_OUT, GET_ALLOCATION(ALLOCATOR, HELTDIR%HFFT_OUT),& + & 1_JPIB, IALLOC_SZ) + CALL C_F_POINTER(C_LOC(ZFFT_L_OUT), ZFFT_OUT, (/ RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,2*KF_FS /)) +#endif + +CALL ELEDIR(ALLOCATOR,ZFFT,ZFFT_OUT) + +!* 4. COMPUTE VORTICITY AND DIVERGENCE AND STORE MEAN WIND ON TASK OWNING WAVE 0 +! -------------------------------------------------------------------------- + + +! ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1),D%NUMP) +IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*MAX(4*KF_UV,1)*SIZEOF(ZVODI_L(1)), 128) +CALL ASSIGN_PTR(ZVODI_L, GET_ALLOCATION(ALLOCATOR, HELTDIR%HVODI),& + & 1_JPIB, IALLOC_SZ) +CALL C_F_POINTER(C_LOC(ZVODI_L), ZVODI, (/ RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,MAX(4*KF_UV,1) /)) + + +#ifdef ACCGPU + !$ACC DATA COPYOUT(PSPVOR,PSPDIV) IF(KF_UV > 0) + !$ACC DATA COPYOUT(PSPSCALAR) IF(PRESENT(PSPSCALAR) .AND. KF_SCALARS > 0) + !$ACC DATA COPYOUT(PSPSC2) IF(PRESENT(PSPSC2)) + !$ACC DATA COPYOUT(PSPSC3A) IF((PRESENT(PSPSC3A))) + !$ACC DATA COPYOUT(PSPSC3B) IF((PRESENT(PSPSC3B))) +#endif + +IF( KF_UV > 0 ) THEN + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUVTVD(KF_UV,ZFFT_OUT(:,:,IUS:IUE),ZFFT_OUT(:,:,IVS:IVE),& + & ZVODI(:,:,IVORS:IVORE),ZVODI(:,:,IDIVS:IDIVE)) + + DO JM=1,D%NUMP + IM = D%MYMS(JM) + + CALL EUVTVD_COMM(IM,JM,KF_UV,KFLDPTRUV,ZFFT_OUT(:,:,IUS:IUE), & + & ZFFT_OUT(:,:,IVS:IVE), & + & PSPMEANU,PSPMEANV) + + ENDDO + +ENDIF + +!* 5. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSP(KF_UV,KF_SCALARS,ZFFT_OUT,ZVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,KFLDPTRUV,KFLDPTRSC) + +#ifdef ACCGPU + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR +END MODULE ELTDIR_MOD diff --git a/src/etrans/gpu/internal/eltinv_mod.F90 b/src/etrans/gpu/internal/eltinv_mod.F90 new file mode 100644 index 000000000..9b02dda4c --- /dev/null +++ b/src/etrans/gpu/internal/eltinv_mod.F90 @@ -0,0 +1,321 @@ +#define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) +MODULE ELTINV_MOD + USE BUFFERED_ALLOCATOR_MOD + + IMPLICIT NONE + + PRIVATE + PUBLIC :: ELTINV, ELTINV_HANDLE, PREPARE_ELTINV + + TYPE ELTINV_HANDLE + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFFT + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFFT_OUT + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFOUBUF_IN + !TYPE(ALLOCATION_RESERVATION_HANDLE) :: HFOUBUF + END TYPE + +CONTAINS + FUNCTION PREPARE_ELTINV(ALLOCATOR,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT) RESULT(HELTINV) + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE TPM_DISTR, ONLY: D + USE TPM_DIM, ONLY: R + USE TPMALD_DIM ,ONLY : RALD + USE ISO_C_BINDING + + IMPLICIT NONE + + TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR + INTEGER(KIND=JPIM), INTENT(IN) ::KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT + + TYPE(ELTINV_HANDLE) :: HELTINV + + INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ + REAL(KIND=JPRBT) :: ZPRBT_DUMMY + + ! ZFFT + IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*(8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS)*SIZEOF(ZPRBT_DUMMY), 128) + HELTINV%HFFT = RESERVE(ALLOCATOR, IALLOC_SZ) + +#ifndef IN_PLACE_FFT + ! ZFFT + IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*(8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS)*SIZEOF(ZPRBT_DUMMY), 128) + HELTINV%HFFT_OUT = RESERVE(ALLOCATOR, IALLOC_SZ) +#endif + + ! FOUBUF_IN + IALLOC_SZ = D%NLENGT1B*2*KF_OUT_LT*SIZEOF(ZPRBT_DUMMY) + HELTINV%HFOUBUF_IN = RESERVE(ALLOCATOR, IALLOC_SZ) + + END FUNCTION PREPARE_ELTINV + +SUBROUTINE ELTINV(ALLOCATOR,HELTINV,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,FOUBUF_IN,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +USE ISO_C_BINDING + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPMALD_DIM ,ONLY : RALD +USE EPRFI1B_MOD ,ONLY : EPRFI1B +USE EVDTUV_MOD ,ONLY : EVDTUV +USE ESPNSDE_MOD ,ONLY : ESPNSDE +USE ELEINV_MOD ,ONLY : ELEINV +USE EASRE1B_MOD ,ONLY : EASRE1B +!!! FIXME !!! USE FSPGL_INT_MOD ,ONLY : FSPGL_INT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!**** *LTINV* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence +! SPNSDE - compute north-south derivatives +! LEINV - Inverse Legendre transform +! ASRE1 - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 26-Aug-2021 Optimization for EASRE1B +! ------------------------------------------------------------------ + +IMPLICIT NONE + +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR +TYPE(ELTINV_HANDLE), INTENT(IN) :: HELTINV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 +REAL(KIND=JPRB), INTENT(OUT), POINTER :: FOUBUF_IN(:) + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB), POINTER :: ZFFT_L(:), ZFFT(:,:,:), ZFFT_L_OUT(:), ZFFT_OUT(:,:,:) +INTEGER(KIND=JPIM) :: IFC, ISTA +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: IFIRST, ILAST,IDIM1,IDIM3,J3 +INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',0,ZHOOK_HANDLE) + +! ZFFT +IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*KLEI2*SIZEOF(ZFFT_L(1)), 128) +CALL ASSIGN_PTR(ZFFT_L, GET_ALLOCATION(ALLOCATOR, HELTINV%HFFT),& + & 1_JPIB, IALLOC_SZ) +CALL C_F_POINTER(C_LOC(ZFFT_L), ZFFT, (/ RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,KLEI2 /)) + +#ifdef OMPGPU +#endif +#ifdef ACCGPU + !$ACC DATA COPYIN(PSPVOR,PSPDIV) IF(KF_UV > 0) + !$ACC DATA COPYIN(PSPSCALAR) IF(PRESENT(PSPSCALAR) .AND. KF_SCALARS > 0) + !$ACC DATA COPYIN(PSPSC2) IF(PRESENT(PSPSC2)) + !$ACC DATA COPYIN(PSPSC3A) IF(PRESENT(PSPSC3A)) + !$ACC DATA COPYIN(PSPSC3B) IF(PRESENT(PSPSC3B)) + !$ACC DATA COPYIN(PSPMEANU) IF(KF_UV > 0) + !$ACC DATA COPYIN(PSPMEANV) IF(KF_UV > 0) +#endif + +IFIRST = 1 +ILAST = 4*KF_UV + +! TODO: this zero-initialization is needed, but could be moved more efficiently inside EPRFI1B/EVDTUV/ESPNSDE +!$acc kernels present (ZFFT) +ZFFT = 0.0_JPRB +!$acc end kernels + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL EPRFI1B(ZFFT(:,:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(ZFFT(:,:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + + ILAST = ILAST+4*KF_UV + + CALL EVDTUV(KF_UV,KFLDPTRUV,ZFFT(:,:,IVORL:IVORU),ZFFT(:,:,IDIVL:IDIVU),& + & ZFFT(:,:,IUL:IUU),ZFFT(:,:,IVL:IVU),PSPMEANU,PSPMEANV) + +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(ZFFT(:,:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL ESPNSDE(KF_SCALARS,ZFFT(:,:,ISL:ISU),ZFFT(:,:,IDL:IDU)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +! ZFFT_OUT +#ifdef IN_PLACE_FFT + ZFFT_OUT=>ZFFT +#else + IALLOC_SZ = ALIGN((RALD%NDGLSUR+R%NNOEXTZG)*D%NUMP*KLEI2*SIZEOF(ZFFT_L_OUT(1)), 128) + CALL ASSIGN_PTR(ZFFT_L_OUT, GET_ALLOCATION(ALLOCATOR, HELTINV%HFFT_OUT),& + & 1_JPIB, IALLOC_SZ) + CALL C_F_POINTER(C_LOC(ZFFT_L_OUT), ZFFT_OUT, (/ RALD%NDGLSUR+R%NNOEXTZG,D%NUMP,KLEI2 /)) +#endif + +CALL ELEINV(ALLOCATOR,ZFFT,ZFFT_OUT) + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + + +! FOUBUF_IN +IALLOC_SZ = D%NLENGT1B*2*KF_OUT_LT*SIZEOF(FOUBUF_IN(1)) +CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HELTINV%HFOUBUF_IN),& + & 1_JPIB, IALLOC_SZ) + +CALL EASRE1B(KF_OUT_LT,ZFFT_OUT(:,:,ISTA:ISTA+IFC-1),FOUBUF_IN) + +! ------------------------------------------------------------------ + +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +IF(PRESENT(FSPGL_PROC)) THEN +!!! FIXME !!! CALL FSPGL_INT(KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& +!!! FIXME !!! & KFLDPTRUV,KFLDPTRSC) +CALL ABORT('FIXME') +ENDIF + +#ifdef ACCGPU + !$ACC WAIT(1) + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA + !$ACC END DATA +#endif + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV +END MODULE ELTINV_MOD + diff --git a/src/etrans/gpu/internal/eprfi1b_mod.F90 b/src/etrans/gpu/internal/eprfi1b_mod.F90 new file mode 100644 index 000000000..397905995 --- /dev/null +++ b/src/etrans/gpu/internal/eprfi1b_mod.F90 @@ -0,0 +1,126 @@ +MODULE EPRFI1B_MOD +CONTAINS +SUBROUTINE EPRFI1B(PFFT,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +USE TPM_DISTR, ONLY : D +USE TPMALD_DISTR ,ONLY : DALD +! +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1B(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(OUT) :: PFFT(:,:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF +INTEGER(KIND=JPIM) :: IM, JM, MAX_NCPL2M +INTEGER(KIND=JPIM) :: JFLDPTR(KFIELDS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',0,ZHOOK_HANDLE) + +IF (PRESENT(KFLDPTR)) THEN + JFLDPTR=KFLDPTR +ELSE + DO JFLD=1,KFIELDS + JFLDPTR(JFLD)=JFLD + ENDDO +ENDIF + +!$acc data present (PFFT, PSPEC) + +!!$acc kernels default(none) +!PFFT = 0._JPRB +!!$acc end kernels + +MAX_NCPL2M = MAXVAL (DALD%NCPL2M) + +!$ACC parallel loop collapse(3) & +!$ACC& present(D,DALD,D%MYMS,DALD%NCPL2M,DALD%NESM0,D%NUMP) & +!$ACC& copyin(KFIELDS,MAX_NCPL2M,JFLDPTR) & +!$ACC& private(IR,II,IM,ILCM,IOFF,INM,JFLD) default(none) +DO JM = 1, D%NUMP + DO JFLD=1,KFIELDS + DO J=1,MAX_NCPL2M,2 + IR = 2*JFLD-1 + II = IR+1 + IM = D%MYMS(JM) + ILCM = DALD%NCPL2M(IM) + IF (J .LE. ILCM) THEN + IOFF = DALD%NESM0(IM) + INM = IOFF+(J-1)*2 + PFFT(J ,JM,IR) = PSPEC(JFLDPTR(JFLD),INM ) + PFFT(J+1,JM,IR) = PSPEC(JFLDPTR(JFLD),INM+1) + PFFT(J ,JM,II) = PSPEC(JFLDPTR(JFLD),INM+2) + PFFT(J+1,JM,II) = PSPEC(JFLDPTR(JFLD),INM+3) + ELSE + PFFT(J ,JM,IR) = 0._JPRB + PFFT(J+1,JM,IR) = 0._JPRB + PFFT(J ,JM,II) = 0._JPRB + PFFT(J+1,JM,II) = 0._JPRB + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end data + + + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1B +END MODULE EPRFI1B_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eprfi2b_mod.F90 b/src/etrans/gpu/internal/eprfi2b_mod.F90 new file mode 100644 index 000000000..8dd94c51d --- /dev/null +++ b/src/etrans/gpu/internal/eprfi2b_mod.F90 @@ -0,0 +1,108 @@ +MODULE EPRFI2B_MOD +CONTAINS +SUBROUTINE EPRFI2B(KFIELD,PFFT,FOUBUF) + +!**** *EPRFI2B* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND_ECTRANS, ONLY : JPRBT +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRBT) , INTENT(OUT) :: PFFT(:,:,:) +REAL(KIND=JPRBT) , INTENT(IN) :: FOUBUF(:) + +INTEGER(KIND=JPIM) :: IM, JM +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL +INTEGER(KIND=JPIM) :: IJR, IJI +REAL(KIND=JPRB) :: SCAL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',0,ZHOOK_HANDLE) + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +SCAL=1._JPRB/REAL(R%NDGL,JPRB) + +!$acc data & +!$acc& present(PFFT) & +!$acc& present(FOUBUF) & +!$acc& copyin(R%NDGL,D%NPNTGTB1,D%NPROCL,D%NUMP,D%MYMS,SCAL) + +!loop over wavenumber +!$acc parallel loop collapse(3) private(ISTAN,IM,IJR,IJI,JM) +DO JM = 1, D%NUMP + DO JF =1,KFIELD + DO JGL=1,R%NDGL + IM = D%MYMS(JM) + IJR = 2*(JF-1)+1 + IJI = IJR+1 + ISTAN = (D%NPNTGTB1(JM,JGL))*2*KFIELD + PFFT(JGL,JM,IJR) = SCAL*FOUBUF(ISTAN+IJR) + PFFT(JGL,JM,IJI) = SCAL*FOUBUF(ISTAN+IJI) + ENDDO + ENDDO +ENDDO + +!$acc end data + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2B +END MODULE EPRFI2B_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eset_resol_mod.F90 b/src/etrans/gpu/internal/eset_resol_mod.F90 new file mode 100644 index 000000000..80e3f3ede --- /dev/null +++ b/src/etrans/gpu/internal/eset_resol_mod.F90 @@ -0,0 +1,64 @@ +MODULE ESET_RESOL_MOD +CONTAINS +SUBROUTINE ESET_RESOL(KRESOL) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +USE TPM_CTL, ONLY : C, CTL_RESOL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',0,ZHOOK_HANDLE) +IF(MSETUP0 == 0) CALL ABORT_TRANS('ESET_RESOL:TRANS NOT SETUP') +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(KRESOL < 1 .OR. KRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'ESET_RESOL: UNKNOWN RESOLUTION ',KRESOL,NMAX_RESOL + CALL ABORT_TRANS('ESET_RESOL:KRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + + + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) + C => CTL_RESOL(NCUR_RESOL) + + RALD => ALDDIM_RESOL(NCUR_RESOL) + DALD => ALDDISTR_RESOL(NCUR_RESOL) + FALD => ALDFIELDS_RESOL(NCUR_RESOL) + GALD => ALDGEO_RESOL(NCUR_RESOL) + +ENDIF +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',1,ZHOOK_HANDLE) + +END SUBROUTINE ESET_RESOL +END MODULE ESET_RESOL_MOD diff --git a/src/etrans/gpu/internal/espnorm_ctl_mod.F90 b/src/etrans/gpu/internal/espnorm_ctl_mod.F90 new file mode 100644 index 000000000..6e0ad3aae --- /dev/null +++ b/src/etrans/gpu/internal/espnorm_ctl_mod.F90 @@ -0,0 +1,64 @@ +MODULE ESPNORM_CTL_MOD +CONTAINS +SUBROUTINE ESPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYSETV, MYPROC + +USE ESPNORMD_MOD ,ONLY : ESPNORMD +USE SPNORMC_MOD ,ONLY : SPNORMC + +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) + +REAL(KIND=JPRB) :: ZMET(0:R%NSPEC_G) + +REAL(KIND=JPRB) :: ZSM(KFLD,D%NUMP) + +REAL(KIND=JPRB) :: ZGM(KFLD_G,0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE1 + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',0,ZHOOK_HANDLE) +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRB +ENDIF + +CALL ESPNORMD(PSPEC,KFLD,ZMET,ZSM) + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',0,ZHOOK_HANDLE1) +CALL SPNORMC(ZSM,KFLD_G,IVSET,KMASTER,RALD%NMSMAX,ZGM) +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',1,ZHOOK_HANDLE1) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = SUM(ZGM,DIM=2) + PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORM_CTL +END MODULE ESPNORM_CTL_MOD diff --git a/src/etrans/gpu/internal/espnormd_mod.F90 b/src/etrans/gpu/internal/espnormd_mod.F90 new file mode 100644 index 000000000..75e245add --- /dev/null +++ b/src/etrans/gpu/internal/espnormd_mod.F90 @@ -0,0 +1,55 @@ +MODULE ESPNORMD_MOD +CONTAINS +SUBROUTINE ESPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PMET(0:R%NSPEC_G) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(OUT) :: PSM(:,:) +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP +INTEGER(KIND=JPIM) :: IN,ISPE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',0,ZHOOK_HANDLE) + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD,IN,ISPE) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRB + IM = D%MYMS(JM) + + IN=DALD%NCPL2M(IM)/2 - 1 + DO JN=0,IN + ISP=DALD%NESM0(IM) + (JN)*4 + ISPE=DALD%NPME (IM) + JN + DO JFLD=1,KFLD + PSM(JFLD,JM) =PSM(JFLD,JM)& + & + PMET(ISPE) *& + & ( PSPEC(JFLD,ISP )**2 + PSPEC(JFLD,ISP+1)**2 +& + & PSPEC(JFLD,ISP+2)**2 + PSPEC(JFLD,ISP+3)**2 ) + + ENDDO + ENDDO + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORMD +END MODULE ESPNORMD_MOD + diff --git a/src/etrans/gpu/internal/espnsde_mod.F90 b/src/etrans/gpu/internal/espnsde_mod.F90 new file mode 100644 index 000000000..e32977c56 --- /dev/null +++ b/src/etrans/gpu/internal/espnsde_mod.F90 @@ -0,0 +1,109 @@ +MODULE ESPNSDE_MOD +CONTAINS +SUBROUTINE ESPNSDE(KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:,:) + +INTEGER(KIND=JPIM) :: J, JN,IN, JM, IM, JNMAX +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',0,ZHOOK_HANDLE) + +JNMAX = MAXVAL (DALD%NCPL2M) + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, IN, ZIN) & +!$acc & present (D,DALD,D%NUMP, D%MYMS, DALD%NCPL2M, PNSD, PF) +DO J=1,2*KF_SCALARS + DO JM = 1, D%NUMP + DO JN=1,JNMAX,2 + IM = D%MYMS(JM) + IF (JN <= DALD%NCPL2M(IM)) THEN + IN =(JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PNSD(JN ,JM,J) = -ZIN*PF(JN+1,JM,J) + PNSD(JN+1,JM,J) = ZIN*PF(JN ,JM,J) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDE +END MODULE ESPNSDE_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eupdsp_mod.F90 b/src/etrans/gpu/internal/eupdsp_mod.F90 new file mode 100644 index 000000000..0a943e213 --- /dev/null +++ b/src/etrans/gpu/internal/eupdsp_mod.F90 @@ -0,0 +1,142 @@ +MODULE EUPDSP_MOD +CONTAINS +SUBROUTINE EUPDSP(KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSP(...) + +! Explicit arguments : +! -------------------- +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPB_MOD ,ONLY : EUPDSPB +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV,KF_SCALARS +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PVODI(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',0,ZHOOK_HANDLE) + +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPB(KF_UV,PVODI(:,:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPB(KF_UV,PVODI(:,:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPB(KF_SCALARS,PFFT(:,:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFT(:,:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFT(:,:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(IDIM1,PFFT(:,:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSP +END MODULE EUPDSP_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/eupdspb_mod.F90 b/src/etrans/gpu/internal/eupdspb_mod.F90 new file mode 100644 index 000000000..d59974edf --- /dev/null +++ b/src/etrans/gpu/internal/eupdspb_mod.F90 @@ -0,0 +1,113 @@ +MODULE EUPDSPB_MOD +CONTAINS +SUBROUTINE EUPDSPB(KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPB* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPB(....) + +! Explicit arguments : +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD +REAL(KIND=JPRB) ,INTENT(IN) :: POA(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD, JM, IM +INTEGER(KIND=JPIM) :: JFLDPTR(KFIELD) +INTEGER(KINd=JPIM) :: MAX_NCPL2M +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + +! ------------------------------------------------------------------ + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',0,ZHOOK_HANDLE) + +!$ACC data present (POA, PSPEC) + +IF(PRESENT(KFLDPTR)) THEN + JFLDPTR=KFLDPTR +ELSE + DO JFLD=1,KFIELD + JFLDPTR(JFLD)=JFLD + ENDDO +ENDIF + +MAX_NCPL2M = MAXVAL (DALD%NCPL2M) + +!$ACC parallel loop collapse(3) & +!$acc& copyin(MAX_NCPL2M,KFIELD,JFLDPTR) & +!$acc& present(D%NUMP,D%MYMS,DALD%NESM0,DALD%NCPL2M) & +!$acc& private(JM,JN,JFLD,IM,INM,IR,II ) +DO JN=1,MAX_NCPL2M,2 + DO JM = 1, D%NUMP + DO JFLD=1,KFIELD + IM = D%MYMS(JM) + INM=DALD%NESM0(IM)+(JN-1)*2 + if ( JN .LE. DALD%NCPL2M(IM) ) then + IR= 2*JFLD-1 + II=IR+1 + PSPEC(JFLDPTR(JFLD),INM) =POA(JN ,JM,IR) + PSPEC(JFLDPTR(JFLD),INM+1) =POA(JN+1,JM,IR) + PSPEC(JFLDPTR(JFLD),INM+2) =POA(JN ,JM,II) + PSPEC(JFLDPTR(JFLD),INM+3) =POA(JN+1,JM,II) + endif + ENDDO + ENDDO + + ENDDO + +!$ACC end data + +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',1,ZHOOK_HANDLE) + +END SUBROUTINE EUPDSPB +END MODULE EUPDSPB_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/euvtvd_comm_mod.F90 b/src/etrans/gpu/internal/euvtvd_comm_mod.F90 new file mode 100644 index 000000000..27da26a39 --- /dev/null +++ b/src/etrans/gpu/internal/euvtvd_comm_mod.F90 @@ -0,0 +1,172 @@ +MODULE EUVTVD_COMM_MOD +CONTAINS +SUBROUTINE EUVTVD_COMM(KM,KMLOC,KFIELD,KFLDPTR,PU,PV,PSPMEANU,PSPMEANV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - communication part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR +USE TPMALD_GEO +USE TPMALD_DISTR +USE MPL_MODULE +USE SET2PE_MOD +USE ABORT_TRANS_MOD +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD, KM, KMLOC +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IR, J + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +INTEGER(KIND=JPIM) :: JA,ITAG,ILEN,IFLD,ISND +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +IF (KM == 0) THEN + +!$acc data present(PU,PV) +!$acc data copyout (PSPMEANU, PSPMEANV) copyin(KMLOC) +!$acc data copyin (KFLDPTR) if(present (KFLDPTR)) + + IF (PRESENT(KFLDPTR)) THEN +!$acc parallel loop private(ir,ifld) + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=PU(1,KMLOC,IR) + PSPMEANV(IFLD)=PV(1,KMLOC,IR) + ENDDO +!$acc end parallel loop + ELSE +!$acc parallel loop private(j,ir) + DO J = 1, KFIELD + IR = 2*J-1 + PSPMEANU(J)=PU(1,KMLOC,IR) + PSPMEANV(J)=PV(1,KMLOC,IR) + ENDDO +!$acc end parallel loop + ENDIF + +!$acc end data +!$acc end data +!$acc end data +ENDIF + +IF (NPRTRW > 1 .AND. KFIELD > 0) THEN + IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+ISND + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO + ELSE + IF (KMLOC == 1) THEN + IF (D%NPROCM(0) /= MYSETW) THEN + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+MYPROC + + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=ISND,KTAG=ITAG,KOUNT=ILEN, & + & CDSTRING='EUVTVD_COMM:') + IF (ILEN /= 2*KFIELD) CALL ABORT_TRANS('EUVTVD_COMM: RECV INVALID RECEIVE MESSAGE LENGHT') + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF + ENDIF + ENDIF + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD_COMM +END MODULE EUVTVD_COMM_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/euvtvd_mod.F90 b/src/etrans/gpu/internal/euvtvd_mod.F90 new file mode 100644 index 000000000..56c389219 --- /dev/null +++ b/src/etrans/gpu/internal/euvtvd_mod.F90 @@ -0,0 +1,131 @@ +MODULE EUVTVD_MOD +CONTAINS +SUBROUTINE EUVTVD(KFIELD,PU,PV,PVOR,PDIV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - calculation part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PVOR(:,:,:),PDIV(:,:,:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN +INTEGER(KIND=JPIM) :: IM, JM, JNMAX + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + + +!$acc parallel loop collapse(3) private(J,JM,JN,IR,II,IM,ZKM) present (PVOR, PDIV, PU, PV,D%MYMS,D%NUMP) +DO J=1,KFIELD + DO JM=1,D%NUMP + DO JN=1,R%NDGL+R%NNOEXTZG + IM = D%MYMS(JM) + ZKM=REAL(IM,JPRB)*GALD%EXWN + IR=2*J-1 + II=IR+1 + PDIV(JN,JM,IR)=-ZKM*PU(JN,JM,II) + PDIV(JN,JM,II)= ZKM*PU(JN,JM,IR) + PVOR(JN,JM,IR)=-ZKM*PV(JN,JM,II) + PVOR(JN,JM,II)= ZKM*PV(JN,JM,IR) + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +JNMAX = MAXVAL(DALD%NCPL2M) + +!$acc parallel loop collapse(3) private(J,JM,JN,IM,ZIN,IN) copyin (JNMAX) present (PVOR, PDIV, PU, PV,DALD%NCPL2M,D%NUMP,D%MYMS) +DO J=1,2*KFIELD + DO JM=1,D%NUMP + DO JN=1,JNMAX,2 + IM = D%MYMS(JM) + IF ( JN <= DALD%NCPL2M(IM) ) THEN ! should be here, but doesn't work !? + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PVOR(JN ,JM,J)=PVOR(JN ,JM,J)+ZIN*PU(JN+1,JM,J) + PVOR(JN+1,JM,J)=PVOR(JN+1,JM,J)-ZIN*PU(JN ,JM,J) + PDIV(JN ,JM,J)=PDIV(JN ,JM,J)-ZIN*PV(JN+1,JM,J) + PDIV(JN+1,JM,J)=PDIV(JN+1,JM,J)+ZIN*PV(JN ,JM,J) + ELSE + PVOR(JN ,JM,J)=0._JPRB + PVOR(JN+1,JM,J)=0._JPRB + PDIV(JN ,JM,J)=0._JPRB + PDIV(JN+1,JM,J)=0._JPRB + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD +END MODULE EUVTVD_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/evdtuv_mod.F90 b/src/etrans/gpu/internal/evdtuv_mod.F90 new file mode 100644 index 000000000..7cd81e1e8 --- /dev/null +++ b/src/etrans/gpu/internal/evdtuv_mod.F90 @@ -0,0 +1,166 @@ +MODULE EVDTUV_MOD +CONTAINS +SUBROUTINE EVDTUV(KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_DISTR ,ONLY : D +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:,:),PDIV(:,:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:,:),PV (:,:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(IN) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IN, IFLD +INTEGER(KIND=JPIM) :: JM, IM +INTEGER(KIND=JPIM) :: JNMAX + +REAL(KIND=JPRB) :: ZLEPINM +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',0,ZHOOK_HANDLE) + +JNMAX = MAXVAL (DALD%NCPL2M) + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, IN, ZIN) & +!$acc & present (D%NUMP, D%MYMS, DALD%NCPL2M, PU, PV, PVOR, PDIV) +DO J=1,2*KFIELD + DO JM = 1, D%NUMP + DO JN=1,JNMAX,2 + IM = D%MYMS (JM) + IF (JN <= DALD%NCPL2M(IM)) THEN + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PU(JN ,JM,J) = -ZIN*PVOR(JN+1,JM,J) + PU(JN+1,JM,J) = ZIN*PVOR(JN ,JM,J) + PV(JN ,JM,J) = -ZIN*PDIV(JN+1,JM,J) + PV(JN+1,JM,J) = ZIN*PDIV(JN ,JM,J) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +!$acc parallel loop collapse (3) private (JM, J, JN, IM, ZKM, IR, II, IJ, ZLEPINM) & +!$acc & present (D%NUMP, D%MYMS, DALD%NCPL2M, FALD%RLEPINM, PU, PV, PDIV, PVOR) +DO J=1,KFIELD + DO JM = 1, D%NUMP + DO JN=1,JNMAX + IM = D%MYMS (JM) + ZKM=REAL(IM,JPRB)*GALD%EXWN + IR = 2*J-1 + II = IR+1 + IF (JN <= DALD%NCPL2M(IM)) THEN + IJ=(JN-1)/2 + ZLEPINM = FALD%RLEPINM(DALD%NPME(IM)+IJ) + PU(JN,JM,IR)= ZLEPINM*(-ZKM*PDIV(JN,JM,II)-PU(JN,JM,IR)) + PU(JN,JM,II)= ZLEPINM*( ZKM*PDIV(JN,JM,IR)-PU(JN,JM,II)) + PV(JN,JM,IR)= ZLEPINM*(-ZKM*PVOR(JN,JM,II)+PV(JN,JM,IR)) + PV(JN,JM,II)= ZLEPINM*( ZKM*PVOR(JN,JM,IR)+PV(JN,JM,II)) + ENDIF + ENDDO + ENDDO +ENDDO +!$acc end parallel loop + +IF (PRESENT(KFLDPTR)) THEN +!$acc parallel loop collapse (2) private (J, JM, IM, IR, IFLD) & +!$acc & present (D%NUMP, D%MYMS, PU, PV, PSPMEANU, PSPMEANV) copyin (KFLDPTR) + DO J = 1, KFIELD + DO JM = 1, D%NUMP + IM = D%MYMS (JM) + IF (IM == 0) THEN + IR = 2*J-1 + IFLD=KFLDPTR(J) + PU(1,JM,IR)=PSPMEANU(IFLD) + PV(1,JM,IR)=PSPMEANV(IFLD) + ENDIF + ENDDO + ENDDO +!$acc end parallel loop +ELSE +!$acc parallel loop collapse (2) private (J, JM, IM, IR) & +!$acc & present (D%NUMP, D%MYMS, PU, PV, PSPMEANU, PSPMEANV) + DO J = 1, KFIELD + DO JM = 1, D%NUMP + IM = D%MYMS (JM) + IF (IM == 0) THEN + IR = 2*J-1 + PU(1,JM,IR)=PSPMEANU(J) + PV(1,JM,IR)=PSPMEANV(J) + ENDIF + ENDDO + ENDDO +!$acc end parallel loop +ENDIF + +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',1,ZHOOK_HANDLE) + +END SUBROUTINE EVDTUV +END MODULE EVDTUV_MOD \ No newline at end of file diff --git a/src/etrans/gpu/internal/suemp_trans_mod.F90 b/src/etrans/gpu/internal/suemp_trans_mod.F90 new file mode 100644 index 000000000..e93252e9b --- /dev/null +++ b/src/etrans/gpu/internal/suemp_trans_mod.F90 @@ -0,0 +1,258 @@ +MODULE SUEMP_TRANS_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS + +! Set up distributed environment for the transform package (part 2) +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, NPRTRNS, NPRTRW, MYSETW, NPROC, MYPROC +USE TPMALD_DIM ,ONLY : RALD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUEMPLAT_MOD ,ONLY : SUEMPLAT +USE SUESTAONL_MOD ,ONLY : SUESTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRD) :: ZMEDIAP + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPRD),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',0,ZHOOK_HANDLE) +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + +ALLOCATE(D%NULTPP(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) +ALLOCATE(D%NPTRLS(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) +ALLOCATE(D%NPROCL(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + +CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) +D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition +ALLOCATE(D%NLTSGTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) +ALLOCATE(D%NLTSFTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) +ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) +ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) +ALLOCATE(D%MSTABF (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + +D%NLTSGTB(:) = 0 +DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO +ENDDO +DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT +ENDDO + +DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET +ENDDO +D%MSTABF(MYSETW) = MYSETW + +ALLOCATE(D%NPNTGTB0(0:RALD%NMSMAX,D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) +ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + + +! Global offsets of processors +D%NSTAGT0B(1) = 0 +D%NSTAGT1B(1) = 0 +DO JA=2,NPRTRNS + D%NSTAGT0B(JA) = D%NSTAGT0B(JA-1)+D%NLTSGTB(JA-1) + D%NSTAGT1B(JA) = D%NSTAGT1B(JA-1)+D%NLTSFTB(JA-1) +ENDDO + +! Global size of foubuf +D%NLENGT0B = D%NSTAGT0B(NPRTRNS)+D%NLTSGTB(NPRTRNS) +D%NLENGT1B = D%NSTAGT1B(NPRTRNS)+D%NLTSFTB(NPRTRNS) + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = D%NSTAGT0B(D%NPROCM(IM))+IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = D%NSTAGT1B(D%NPROCL(IGL))+IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ELSE + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUEMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN +ALLOCATE(D%NSTAGTF(D%NDGL_FS+1)) ! NDGL_FS+1 needed in trmtol_unpack +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) +IOFF = 0 +DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+2+R%NNOEXTZL +ENDDO +D%NSTAGTF(D%NDGL_FS+1) = IOFF +D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS +END MODULE SUEMP_TRANS_MOD + diff --git a/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 b/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 new file mode 100644 index 000000000..6112ef2e1 --- /dev/null +++ b/src/etrans/gpu/internal/suemp_trans_preleg_mod.F90 @@ -0,0 +1,240 @@ +MODULE SUEMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD + +!USE SUWAVEDI_MOD +!USE ABORT_TRANS_MOD + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JA,JM,JMLOC,JW,JV,ILATPP,IRESTL,IMLOC,IDT,INM,JN,IM,ILAST + +LOGICAL :: LLP1,LLP2 + +INTEGER(KIND=JPIM) :: ISPEC(NPRTRW),IMYMS(RALD%NMSMAX+1),IKNTMP(0:RALD%NMSMAX) +INTEGER(KIND=JPIM) :: IKMTMP(0:R%NSMAX),ISPEC2P +INTEGER(KIND=JPIM) :: IC(NPRTRW) +INTEGER(KIND=JPIM) :: IMDIM,IL,IND,IK,IPOS,IKM +REAL(KIND=JPRB) :: ZLEPDIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',0,ZHOOK_HANDLE) + +IF(.NOT.D%LGRIDONLY) THEN + +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS_PRELEG ===' + +!* 1. Initialize partitioning of wave numbers to PEs ! +! ---------------------------------------------- + +ALLOCATE(D%NASM0(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + +ALLOCATE(DALD%NESM0(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'DALD%NESM0 ',SIZE(DALD%NESM0 ),SHAPE(DALD%NESM0 ) + +ALLOCATE(D%NATM0(0:R%NTMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) +ALLOCATE(D%NUMPP(NPRTRW)) +IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) +ALLOCATE(D%NPOSSP(NPRTRW+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + +ALLOCATE(D%NPROCM(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + +ALLOCATE(DALD%NPME(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'DALD%NPME',SIZE(DALD%NPME),SHAPE(DALD%NPME) +ALLOCATE(DALD%NCPL2M(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'DALD%NCPL2M',SIZE(DALD%NCPL2M),SHAPE(DALD%NCPL2M) +CALL ELLIPS(R%NSMAX,RALD%NMSMAX,IKNTMP,IKMTMP) +DALD%NPME(0)=1 +DO JM=1,RALD%NMSMAX + DALD%NPME(JM)=DALD%NPME(JM-1)+IKNTMP(JM-1)+1 +ENDDO +DO JM=0,RALD%NMSMAX + DALD%NCPL2M(JM) = 2*(IKNTMP(JM)+1) +ENDDO +ALLOCATE(FALD%RLEPINM(R%NSPEC_G/2)) +IF(LLP2)WRITE(NOUT,9) 'FALD%RLEPINM',SIZE(FALD%RLEPINM),SHAPE(FALD%RLEPINM) +DO JM=0,RALD%NMSMAX + DO JN=1,IKNTMP(JM) + ZLEPDIM=-((REAL(JM,JPRB)**2)*(GALD%EXWN**2)+& + & (REAL(JN,JPRB)**2)*(GALD%EYWN**2)) + FALD%RLEPINM(DALD%NPME(JM)+JN)=1./ZLEPDIM + ENDDO +ENDDO +DO JM=1,RALD%NMSMAX + ZLEPDIM=-(REAL(JM,JPRB)**2)*(GALD%EXWN**2) + FALD%RLEPINM(DALD%NPME(JM))=1./ZLEPDIM +ENDDO +FALD%RLEPINM(DALD%NPME(0))=0. + +D%NUMPP(:) = 0 +ISPEC(:) = 0 +DALD%NESM0(:)=-99 + +IMDIM = 0 +IL = 1 +IND = 1 +IK = 0 +IPOS = 1 +DO JM=0,RALD%NMSMAX + IK = IK + IND + IF (IK > NPRTRW) THEN + IK = NPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + + IKM =DALD%NCPL2M(JM)/2 -1 + D%NPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+IKM+1 + D%NUMPP(IK) = D%NUMPP(IK)+1 + IF (IK == MYSETW) THEN + IMDIM = IMDIM + IKM+1 + IMYMS(IL) = JM + DALD%NESM0(JM) = IPOS + IPOS = IPOS+(IKM+1)*4 + IL = IL+1 + ENDIF +ENDDO +D%NPOSSP(1) = 1 +ISPEC2P = 4*ISPEC(1) +D%NSPEC2MX = ISPEC2P +DO JA=2,NPRTRW + D%NPOSSP(JA) = D%NPOSSP(JA-1)+ISPEC2P + ISPEC2P = 4*ISPEC(JA) + D%NSPEC2MX=MAX(D%NSPEC2MX,ISPEC2P) +ENDDO +D%NPOSSP(NPRTRW+1) = D%NPOSSP(NPRTRW)+ISPEC2P + +D%NSPEC2 = 4*IMDIM +D%NSPEC=D%NSPEC2 + +D%NUMP = D%NUMPP (MYSETW) +ALLOCATE(D%MYMS(D%NUMP)) +IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) +D%MYMS(:) = IMYMS(1:D%NUMP) +D%NUMTP = D%NUMP + +! pointer to the first wave number of a given wave-set in NALLMS array +ALLOCATE(D%NPTRMS(NPRTRW)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) +D%NPTRMS(:) = 1 +DO JA=2,NPRTRW + D%NPTRMS(JA) = D%NPTRMS(JA-1)+D%NUMPP(JA-1) +ENDDO +! D%NALLMS : wave numbers for all wave-set concatenated together to give all +! wave numbers in wave-set order. +ALLOCATE(D%NALLMS(RALD%NMSMAX+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) +IC(:) = 0 +DO JM=0,RALD%NMSMAX + D%NALLMS(IC(D%NPROCM(JM))+D%NPTRMS(D%NPROCM(JM))) = JM + IC(D%NPROCM(JM)) = IC(D%NPROCM(JM))+1 +ENDDO +ALLOCATE(D%NDIM0G(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) +IPOS = 1 +DO JA=1,NPRTRW + DO JMLOC=1,D%NUMPP(JA) + IM = D%NALLMS(D%NPTRMS(JA)+JMLOC-1) + D%NDIM0G(IM) = IPOS + IPOS = IPOS+2*DALD%NCPL2M(IM) + ENDDO +ENDDO + +ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) +ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + +D%NLATLS(:,:) = 9999 +D%NLATLE(:,:) = -1 + +ILATPP = R%NDGL/NPRTRW +IRESTL = R%NDGL-NPRTRW*ILATPP +DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JA-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JA-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF +ENDDO +ILAST=0 +DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) +ENDDO +IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO +ENDIF + +ALLOCATE(D%NPMT(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) +ALLOCATE(D%NPMS(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) +ALLOCATE(D%NPMG(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) +IDT = R%NTMAX-R%NSMAX +INM = 0 +DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + + INM = INM+R%NTMAX+2-IMLOC +ENDDO +INM = 0 +DO JM=0,R%NSMAX + + INM = INM+R%NTMAX+2-JM +ENDDO + +D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS_PRELEG +END MODULE SUEMP_TRANS_PRELEG_MOD diff --git a/src/etrans/gpu/internal/suestaonl_mod.F90 b/src/etrans/gpu/internal/suestaonl_mod.F90 new file mode 100644 index 000000000..3d48997f8 --- /dev/null +++ b/src/etrans/gpu/internal/suestaonl_mod.F90 @@ -0,0 +1,450 @@ +MODULE SUESTAONL_MOD +CONTAINS +SUBROUTINE SUESTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUESTAONL * - Routine to initialize parallel environment, TAL + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUESTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted +! distribution +! PMEDIAP -mean weight per PE if weighted +! distribution +! KPROCAGP -number of grid points per A set +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! 03-03-03 G. Radnoti: no merge: only difference with +! sustaonl: ezone added to last a-set +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! A.Bogatchev Sep-2010 phasing for AL37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! R. El Khatib 26-Apr-2018 vectorization +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC +USE TPMALD_DIM ,ONLY : RALD +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_NS, N_REGIONS_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRD),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRD),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) + +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, & + & IGL, IGL1, IGL2, IGLOFF, IGPTA, & + & IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + & ILSEND, INPLAT, INXLAT, IPOS, & + & IPROCB, IPTSRE, IRECV, & + & IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + & ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRB) :: ZLAT, ZLAT1(R%NDGL), ZCOMP +REAL(KIND=JPRB) :: ZDIVID(R%NDGL),ZXPTLAT(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ----------------------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',0,ZHOOK_HANDLE) +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ENDIF + ELSE + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ELSE + IGPTS = KMEDIAP-1+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*KRESTM+(KMEDIAP-1)*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ZXPTLAT(1) = REAL(IXPTLAT(1)) +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ZXPTLAT(JGL) = 1.0_JPRB + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + +! grid point decomposition +! --------------------------------------- +DO JGL=1,ILEN + ZDIVID(JGL)=1._JPRB/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRB) +ENDDO +IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 +ENDIF +DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + + DO JNPTSRE=1,IPTSRE + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT=ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ELSE + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT = ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + +ENDDO + +IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) +ENDIF +! Exchange local partitioning info to produce global view + +IF( NPROC > 1 )THEN + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUESTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUESTAONL:') + ENDIF + ENDDO + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUESTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " row=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + WRITE(0,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " ROW=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUESTAONL : row=",i4," sta=",i4,& + & " GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUESTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUESTAONL: inconsistent partitioning') +ENDIF + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUESTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I3))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," NSTA=",& + & 32(1X,I3))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," D%NONL=",& + & 32(1X,I3))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE SUESTAONL +END MODULE SUESTAONL_MOD diff --git a/src/etrans/gpu/internal/tpmald_fields.F90 b/src/etrans/gpu/internal/tpmald_fields.F90 new file mode 100644 index 000000000..9cb65aef5 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_fields.F90 @@ -0,0 +1,19 @@ +MODULE TPMALD_FIELDS + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFIELDS_TYPE + +REAL(KIND=JPRB) ,POINTER :: RLEPINM(:) ! eigen-values of the inverse Laplace operator +END TYPE ALDFIELDS_TYPE + +TYPE(ALDFIELDS_TYPE),ALLOCATABLE,TARGET :: ALDFIELDS_RESOL(:) +TYPE(ALDFIELDS_TYPE),POINTER :: FALD + +REAL(KIND=JPRB) ,ALLOCATABLE :: FALD_RLEPINM(:) + +END MODULE TPMALD_FIELDS diff --git a/src/etrans/gpu/internal/tpmald_geo.F90 b/src/etrans/gpu/internal/tpmald_geo.F90 new file mode 100644 index 000000000..111e618f2 --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_geo.F90 @@ -0,0 +1,22 @@ +MODULE TPMALD_GEO + +! Module containing data describing plane projection grid. + +USE PARKIND1 ,ONLY : JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDGEO_TYPE + +! GEOGRAPHY + +REAL(KIND=JPRB) :: EYWN ! Y-reso +REAL(KIND=JPRB) :: EXWN ! X-reso +END TYPE ALDGEO_TYPE + +TYPE(ALDGEO_TYPE),ALLOCATABLE,TARGET :: ALDGEO_RESOL(:) +TYPE(ALDGEO_TYPE),POINTER :: GALD + +END MODULE TPMALD_GEO diff --git a/src/etrans/gpu/internal/tpmald_tcdis.F90 b/src/etrans/gpu/internal/tpmald_tcdis.F90 new file mode 100644 index 000000000..2ba567b0a --- /dev/null +++ b/src/etrans/gpu/internal/tpmald_tcdis.F90 @@ -0,0 +1,13 @@ +MODULE TPMALD_TCDIS + +! useless + +USE PARKIND1 ,ONLY : JPRB + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRB) :: TCDIS + +END MODULE TPMALD_TCDIS diff --git a/src/etrans/sedrenames.txt b/src/etrans/sedrenames.txt index 2e83252b3..b49243ba5 100644 --- a/src/etrans/sedrenames.txt +++ b/src/etrans/sedrenames.txt @@ -13,7 +13,6 @@ s/DIST_GRID_CTL_MOD/DIST_GRID_CTL_MOD_VARIANTDESIGNATOR/g s/edist_grid( *($|\(| |\*))/edist_grid_VARIANTDESIGNATOR\1/g s/EDIST_GRID( *($|\(| |\*))/EDIST_GRID_VARIANTDESIGNATOR\1/g s/DIST_SPEC_CONTROL_MOD/DIST_SPEC_CONTROL_MOD_VARIANTDESIGNATOR/g -s/EDIST_SPEC_CONTROL_MOD/EDIST_SPEC_CONTROL_MOD_VARIANTDESIGNATOR/g s/edist_spec( *($|\(| |\*))/edist_spec_VARIANTDESIGNATOR\1/g s/EDIST_SPEC( *($|\(| |\*))/EDIST_SPEC_VARIANTDESIGNATOR\1/g s/FOURIER_IN_MOD/FOURIER_IN_MOD_VARIANTDESIGNATOR/g @@ -43,6 +42,8 @@ s/EGATH_SPEC( *($|\(| |\*))/EGATH_SPEC_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS_CTL_MOD/GPNORM_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/egpnorm_trans( *($|\(| |\*))/egpnorm_trans_VARIANTDESIGNATOR\1/g s/EGPNORM_TRANS( *($|\(| |\*))/EGPNORM_TRANS_VARIANTDESIGNATOR\1/g +s/gpnorm_trans( *($|\(| |\*))/gpnorm_trans_VARIANTDESIGNATOR\1/g +s/GPNORM_TRANS( *($|\(| |\*))/GPNORM_TRANS_VARIANTDESIGNATOR\1/g s/EINV_TRANS_CTL_MOD/EINV_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/EINV_TRANS_CTLAD_MOD/EINV_TRANS_CTLAD_MOD_VARIANTDESIGNATOR/g s/einv_trans( *($|\(| |\*))/einv_trans_VARIANTDESIGNATOR\1/g @@ -80,15 +81,12 @@ s/EPRFI2AD_MOD/EPRFI2AD_MOD_VARIANTDESIGNATOR/g s/EPRFI2B_MOD/EPRFI2B_MOD_VARIANTDESIGNATOR/g s/EPRFI2BAD_MOD/EPRFI2BAD_MOD_VARIANTDESIGNATOR/g s/ESET_RESOL_MOD/ESET_RESOL_MOD_VARIANTDESIGNATOR/g -s/ESETUP_DIMS_MOD/ESETUP_DIMS_MOD_VARIANTDESIGNATOR/g -s/ESETUP_GEOM_MOD/ESETUP_GEOM_MOD_VARIANTDESIGNATOR/g s/ESETUP_TRANS( *($|\(| |\*))/ESETUP_TRANS_VARIANTDESIGNATOR\1/g s/esetup_trans( *($|\(| |\*|\.h))/esetup_trans_VARIANTDESIGNATOR\1/g s/especnorm/especnorm_VARIANTDESIGNATOR/g s/ESPECNORM/ESPECNORM_VARIANTDESIGNATOR/g s/SPNORM_CTL_MOD/SPNORM_CTL_MOD_VARIANTDESIGNATOR/g s/SPNORMC_MOD/SPNORMC_MOD_VARIANTDESIGNATOR/g -s/ESPNORMC_MOD/ESPNORMC_MOD_VARIANTDESIGNATOR/g s/ESPNORMD_MOD/ESPNORMD_MOD_VARIANTDESIGNATOR/g s/ESPNSDE_MOD/ESPNSDE_MOD_VARIANTDESIGNATOR/g s/ESPNSDEAD_MOD/ESPNSDEAD_MOD_VARIANTDESIGNATOR/g @@ -97,8 +95,6 @@ s/ETIBIHIE/ETIBIHIE_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g s/TPM_FLT/TPM_FLT_VARIANTDESIGNATOR/g s/TPM_TRANS/TPM_TRANS_VARIANTDESIGNATOR/g -s/TPMALD_DIM/TPMALD_DIM_VARIANTDESIGNATOR/g -s/TPMALD_DISTR/TPMALD_DISTR_VARIANTDESIGNATOR/g s/TPMALD_FFT/TPMALD_FFT_VARIANTDESIGNATOR/g s/TPMALD_FIELD/TPMALD_FIELD_VARIANTDESIGNATOR/g s/TPMALD_GEO/TPMALD_GEO_VARIANTDESIGNATOR/g @@ -112,7 +108,9 @@ s/etrans_release( *($|\(| |\*|\.h))/etrans_release_VARIANTDESIGNATOR\1/g s/ETRANS_RELEASE/ETRANS_RELEASE_VARIANTDESIGNATOR/g s/TRLTOG_MOD/TRLTOG_MOD_VARIANTDESIGNATOR/g s/TRLTOM_MOD/TRLTOM_MOD_VARIANTDESIGNATOR/g +s/TRLTOM_PACK_UNPACK/TRLTOM_PACK_UNPACK_VARIANTDESIGNATOR/g s/TRMTOL_MOD/TRMTOL_MOD_VARIANTDESIGNATOR/g +s/TRMTOL_PACK_UNPACK/TRMTOL_PACK_UNPACK_VARIANTDESIGNATOR/g s/EUPDSP_MOD/EUPDSP_MOD_VARIANTDESIGNATOR/g s/EUPDSPAD_MOD/EUPDSPAD_MOD_VARIANTDESIGNATOR/g s/EUPDSPB_MOD/EUPDSPB_MOD_VARIANTDESIGNATOR/g @@ -132,6 +130,4 @@ s/EVDTUVAD_COMM_MOD/EVDTUVAD_COMM_MOD_VARIANTDESIGNATOR/g s/SUEFFT_MOD/SUEFFT_MOD_VARIANTDESIGNATOR/g s/SUEMP_TRANS_MOD/SUEMP_TRANS_MOD_VARIANTDESIGNATOR/g s/SUEMP_TRANS_PRELEG_MOD/SUEMP_TRANS_PRELEG_MOD_VARIANTDESIGNATOR/g -s/SUEMPLATB_MOD/SUEMPLATB_MOD_VARIANTDESIGNATOR/g -s/SUEMPLAT_MOD/SUEMPLAT_MOD_VARIANTDESIGNATOR/g s/SUESTAONL_MOD/SUESTAONL_MOD_VARIANTDESIGNATOR/g diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index f507a8174..ec70ba658 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -64,20 +64,59 @@ foreach( program ectrans-benchmark ) endforeach( program ) if( HAVE_ETRANS ) - foreach( prec sp dp ) - if( HAVE_${prec} ) - ecbuild_add_executable( TARGET ectrans-lam-benchmark-cpu-${prec} - SOURCES ectrans-lam-benchmark.F90 - LINKER_LANGUAGE Fortran - LIBS - fiat - parkind_${prec} - trans_${prec} - etrans_${prec} - OpenMP::OpenMP_Fortran - ) + foreach( program ectrans-lam-benchmark ) + + if ( HAVE_CPU ) + foreach( prec dp sp ) + if( HAVE_${prec} ) + ecbuild_add_executable( TARGET ${program}-cpu-${prec} + SOURCES ${program}.F90 + LINKER_LANGUAGE Fortran + LIBS + fiat + parkind_${prec} + trans_${prec} + etrans_${prec} + $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> + DEFINITIONS + VERSION="cpu" + ) + ectrans_target_fortran_module_directory(TARGET ${program}-cpu-${prec} MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/module/${program}-cpu-${prec} ) + endif() + endforeach( prec) endif() - endforeach() + + if( HAVE_GPU AND TRUE ) + list( APPEND util_PRIVATE_DEFINITIONS + $<${HAVE_CUDA}:CUDA> + $<${HAVE_HIP}:HIP> + ) + list( APPEND util_PRIVATE_LIBRARIES + $<${HAVE_CUDA}:CUDA::cudart> + $<${HAVE_HIP}:hip::host> + ) + foreach( prec dp sp ) + if( HAVE_${prec} ) + ecbuild_add_executable( TARGET ${program}-gpu-${prec} + SOURCES ${program}.F90 + LINKER_LANGUAGE Fortran + LIBS + fiat + parkind_${prec} + trans_gpu_${prec} + etrans_gpu_${prec} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> + ${util_PRIVATE_LIBRARIES} + DEFINITIONS + VERSION="gpu" + ${util_PRIVATE_DEFINITIONS} + ) + ectrans_target_fortran_module_directory(TARGET ${program}-gpu-${prec} MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/module/${program}-gpu-${prec} ) + endif() + endforeach( prec ) + endif( ) + endforeach( program ) endif() # ectrans information tool diff --git a/src/programs/ectrans-lam-benchmark.F90 b/src/programs/ectrans-lam-benchmark.F90 index bb8201a33..d986e2b7e 100644 --- a/src/programs/ectrans-lam-benchmark.F90 +++ b/src/programs/ectrans-lam-benchmark.F90 @@ -101,6 +101,9 @@ program ectrans_lam_benchmark real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt +real(kind=jprb), allocatable :: zgp3a_ctg (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable :: zgpuv_ctg (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable :: zgp2_ctg (:,:,:) ! Single level fields at t and t-dt ! Spectral space data structures real(kind=jprb), allocatable, target :: sp3d(:,:,:) @@ -207,6 +210,7 @@ program ectrans_lam_benchmark #include "esetup_trans.h" #include "einv_trans.h" #include "edir_trans.h" +#include "etrans_end.h" #include "etrans_inq.h" #include "especnorm.h" #include "abor1.intfb.h" @@ -556,12 +560,12 @@ program ectrans_lam_benchmark !================================================================================================= if (ldump_values) then - ! dump a field to a binary file - call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) - if (lvordiv) then - call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1:1), 'D', noutdump) - call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) - endif + ! dump a field to a binary file + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) + if (lvordiv) then + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1:1), 'D', noutdump) + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) + endif call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc3a(1,:,1),ivset(1:1), 'T', noutdump) endif @@ -575,6 +579,7 @@ program ectrans_lam_benchmark !=================================================================================================== do jstep = 1, iters + if( lstats ) call gstats(3,0) ztstep(jstep) = omp_get_wtime() @@ -584,6 +589,7 @@ program ectrans_lam_benchmark ztstep1(jstep) = omp_get_wtime() if( lstats ) call gstats(4,0) + if (lvordiv) then call einv_trans(kresol=1, kproma=nproma, & @@ -643,12 +649,16 @@ program ectrans_lam_benchmark if( lstats ) call gstats(5,0) + ! take local copies to make them contiguous; this is not the case when derivatives are requested and nproma 0) then + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) - + if ( myproc == 1 ) then - zmaxerr(:) = -999.0 - do ifld = 1, nflevg - zerr(3) = abs(real(znormvor(ifld),kind=jprd)/real(znormvor0(ifld),kind=jprd) - 1.0_jprd) - zmaxerr(3) = max(zmaxerr(3), zerr(3)) - if (verbosity >= 1) then - write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor0(ifld), zerr(3) - endif - enddo - do ifld = 1, nflevg - zerr(2) = abs(real(znormdiv(ifld),kind=jprd)/real(znormdiv0(ifld),kind=jprd) - 1.0d0) - zmaxerr(2) = max(zmaxerr(2),zerr(2)) - if (verbosity >= 1) then - write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv0(ifld), zerr(2) - endif - enddo - do ifld = 1, nflevg - zerr(4) = abs(real(znormt(ifld),kind=jprd)/real(znormt0(ifld),kind=jprd) - 1.0d0) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) - if (verbosity >= 1) then - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt0(ifld), zerr(4) - endif - enddo - do ifld = 1, 1 - zerr(1) = abs(real(znormsp(ifld),kind=jprd)/real(znormsp0(ifld),kind=jprd) - 1.0d0) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) - if (verbosity >= 1) then - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp0(ifld), zerr(1) - endif - enddo - - ! maximum error across all fields - zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) - - if (verbosity >= 1) write(nout,*) - write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) - write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) - write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) - write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) - write(nout,*) - write(nout,'("max error combined = = ",e10.3)') zmaxerrg - write(nout,*) - - if (ncheck > 0) then - ! If the maximum spectral norm error across all fields is greater than 100 times the machine - ! epsilon, fail the test - if (zmaxerrg > real(ncheck, jprb) * epsilon(1.0_jprb)) then - write(nout, '(a)') '*******************************' - write(nout, '(a)') 'Correctness test failed' - write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg - write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprb) * epsilon(1.0_jprb) - write(nout, '(a)') '*******************************' - error stop - endif - endif + zmaxerr(:) = -999.0 + do ifld = 1, nflevg + zerr(3) = abs(real(znormvor(ifld),kind=jprd)/real(znormvor0(ifld),kind=jprd) - 1.0_jprd) + zmaxerr(3) = max(zmaxerr(3), zerr(3)) + if (verbosity >= 1) then + write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor0(ifld), zerr(3) + endif + enddo + do ifld = 1, nflevg + zerr(2) = abs(real(znormdiv(ifld),kind=jprd)/real(znormdiv0(ifld),kind=jprd) - 1.0d0) + zmaxerr(2) = max(zmaxerr(2),zerr(2)) + if (verbosity >= 1) then + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv0(ifld), zerr(2) + endif + enddo + do ifld = 1, nflevg + zerr(4) = abs(real(znormt(ifld),kind=jprd)/real(znormt0(ifld),kind=jprd) - 1.0d0) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (verbosity >= 1) then + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt0(ifld), zerr(4) + endif + enddo + do ifld = 1, 1 + zerr(1) = abs(real(znormsp(ifld),kind=jprd)/real(znormsp0(ifld),kind=jprd) - 1.0d0) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + if (verbosity >= 1) then + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp0(ifld), zerr(1) + endif + enddo + + ! maximum error across all fields + zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) + + if (verbosity >= 1) write(nout,*) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) + write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + write(nout,*) + write(nout,'("max error combined = = ",e10.3)') zmaxerrg + write(nout,*) + + if (ncheck > 0) then + ! If the maximum spectral norm error across all fields is greater than 100 times the machine + ! epsilon, fail the test + if (zmaxerrg > real(ncheck, jprb) * epsilon(1.0_jprb)) then + write(nout, '(a)') '*******************************' + write(nout, '(a)') 'Correctness test failed' + write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg + write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprb) * epsilon(1.0_jprb) + write(nout, '(a)') '*******************************' + call flush(nout) + error stop + endif + endif endif endif @@ -889,6 +906,7 @@ program ectrans_lam_benchmark write(nout,'(a)') '======= End of time step stats =======' write(nout,'(" ")') + if (lstack) then ! Gather stack usage statistics istack = getstackusage() @@ -909,11 +927,19 @@ program ectrans_lam_benchmark endif endif + !=================================================================================================== ! Cleanup !=================================================================================================== -! TODO: many more arrays to deallocate +deallocate(nprcids,numll) +deallocate(sp3d,zspsc2,zmeanu,zmeanv) +deallocate(ivset) +deallocate(zgpuv,zgp3a,zgp2) +if ( allocated(znormsp) ) deallocate(znormsp,znormsp0,znormvor,znormvor0,znormdiv,znormdiv0,znormt,znormt0) +deallocate(ztstep,ztstep1,ztstep2) + +call etrans_end() !=================================================================================================== @@ -936,6 +962,7 @@ program ectrans_lam_benchmark call mpl_end(ldmeminfo=.false.) endif + !=================================================================================================== ! Close file !=================================================================================================== @@ -1235,11 +1262,11 @@ subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) integer, allocatable :: my_km(:), my_kn(:) ! Choose a harmonic to initialize arrays - integer :: m_num = 1 ! Zonal wavenumber + integer :: m_num = 3 ! Zonal wavenumber integer :: n_num = 0 ! Meridional wavenumber ! Type of initialization: (single) 'harmonic' or (random) 'spectrum' - character(len=32) :: init_type='harmonic' + character(len=32) :: init_type='spectrum' ! First initialise all spectral coefficients to zero field(:) = 0.0 @@ -1261,7 +1288,7 @@ subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) call etrans_inq(kspec2=kspec2) allocate(my_kn(kspec2),my_km(kspec2)) call etrans_inq(knvalue=my_kn,kmvalue=my_km) - + ! If rank is responsible for the chosen zonal wavenumber... if ( init_type == 'harmonic' ) then do ispec=1,nspec2,4 @@ -1281,10 +1308,10 @@ subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) ! set some components to zero because they are unphysical do ispec=1,nspec2,4 if ( my_kn(ispec)== 0 .and. my_km(ispec) == 0 ) field(ispec:ispec+3)=0. ! remove mean value for vorticity and divergence - if ( my_kn(ispec)== 0 ) field(ispec+1:ispec+3:2)=0. ! remove sine component on zero-wavenumber - if ( my_kn(ispec)== nmsmax ) field(ispec+1:ispec+3:2)=0. ! remove sine component on last-wavenumber - if ( my_km(ispec)== 0 ) field(ispec+2:ispec+3)=0. ! remove sine component on zero-wavenumber - if ( my_km(ispec)== nsmax ) field(ispec+2:ispec+3)=0. ! remove sine component on last-wavenumber + if ( my_kn(ispec)== 0 ) field(ispec+1:ispec+3:2)=0. ! remove sine component on zero meridional wavenumber + !if ( my_kn(ispec)== nsmax ) field(ispec+1:ispec+3:2)=0. ! remove sine component on last meridional wavenumber -- must only be zero when nsmax==nlat/2 + if ( my_km(ispec)== 0 ) field(ispec+2:ispec+3)=0. ! remove sine component on zero zonal wavenumber + !if ( my_km(ispec)== nmsmax ) field(ispec+2:ispec+3)=0. ! remove sine component on last meridional wavenumber -- must only be zero when nmsmax==nlon/2 enddo ! scale according to wavenumber**2 @@ -1293,6 +1320,8 @@ subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) enddo endif + deallocate(my_kn,my_km) + end subroutine initialize_2d_spectral_field !=================================================================================================== @@ -1338,7 +1367,7 @@ subroutine dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, fld, fldch #endif open(noutdump, file=filename, form="unformatted", access="stream") write(noutdump) kgptotg/nlat,nlat ! dimensions - write(noutdump) fldg ! data + write(noutdump) fldg ! data close(noutdump) ! write to screen @@ -1346,7 +1375,7 @@ subroutine dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, fld, fldch write (*,*) fldchar,' at iteration ',jstep,':' write (*,frmt) fldg call flush(6) - + deallocate(fldg) endif @@ -1386,25 +1415,25 @@ subroutine dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, fld, kvset, if ( myproc == 1 ) then call etrans_inq(kspec2g=nspec2g) allocate(fldg(1,nspec2g)) - call ellips(nsmax,nmsmax,knse,kmse) + call ellips(nsmax,nmsmax,knse,kmse) endif call egath_spec(PSPECG=fldg,kfgathg=kfgathg,kto=kto,kvset=kvset,PSPEC=fld) if ( myproc == 1 ) then - fld2g=0. - jj=1 - do jms=0,nmsmax - do jns=0,knse(jms) - fld2g(2*jms+0,2*jns+0)=fldg(1,jj) - fld2g(2*jms+0,2*jns+1)=fldg(1,jj+1) - fld2g(2*jms+1,2*jns+0)=fldg(1,jj+2) - fld2g(2*jms+1,2*jns+1)=fldg(1,jj+3) - jj=jj+4 - enddo - enddo - + fld2g=0. + jj=1 + do jms=0,nmsmax + do jns=0,knse(jms) + fld2g(2*jms+0,2*jns+0)=fldg(1,jj) + fld2g(2*jms+0,2*jns+1)=fldg(1,jj+1) + fld2g(2*jms+1,2*jns+0)=fldg(1,jj+2) + fld2g(2*jms+1,2*jns+1)=fldg(1,jj+3) + jj=jj+4 + enddo + enddo + ! write to binary file write(filename(1:1),'(a1)') fldchar write(filename(3:5),'(i3.3)') jstep @@ -1414,7 +1443,7 @@ subroutine dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, fld, kvset, write(filename(7:9),'(a3)') 'cpu' #endif open(noutdump, file=filename, form="unformatted", access="stream") - write(noutdump) 2*nmsmax+2,2*nsmax+2 ! dimensions + write(noutdump) 2*nmsmax+2,2*nsmax+2 ! dimensions write(noutdump) fld2g ! data close(noutdump) @@ -1423,7 +1452,7 @@ subroutine dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, fld, kvset, write (*,*) fldchar,' at iteration ',jstep,':' write (*,frmt) fld2g call flush(6) - + deallocate(fldg) endif diff --git a/src/trans/gpu/algor/hicfft.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp index 56a8e52e3..262014ee8 100644 --- a/src/trans/gpu/algor/hicfft.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -154,8 +154,8 @@ std::vector> plan_all(int resol_id, int kfield, int int embed[] = {1}; fftSafeCall(hipfftPlanMany( &plan, 1, &nloen, embed, 1, is_forward ? dist : dist / 2, embed, 1, - is_forward ? dist / 2 : dist, Direction, kfield)); - newPlans.emplace_back(plan, kfield * offsets[i]); + is_forward ? dist / 2 : dist, Direction, abs(kfield))); + newPlans.emplace_back(plan, abs(kfield) * offsets[i]); } fftPlansCache.insert({key, newPlans}); } diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 9d8196cde..10155d1bc 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -86,7 +86,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) TYPE(TRLTOM_PACK_HANDLE), INTENT(IN) :: HTRLTOM_PACK INTEGER(KIND=JPIM) :: JM,JF,IGLG,OFFSET_VAR,KGL - INTEGER(KIND=JPIB) :: IOFF_LAT,ISTA + INTEGER(KIND=JPIB) :: IOFF_LAT,ISTA, NMEN_MAX REAL(KIND=JPRBT) :: SCAL @@ -107,6 +107,8 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) ! scale results and move into next transformation buffer OFFSET_VAR=D_NPTRLS(MYSETW) + + NMEN_MAX=MAXVAL(G_NMEN) #ifdef OMPGPU !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IGLG,IOFF_LAT,ISTA,SCAL) & @@ -123,7 +125,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) #endif #endif DO KGL=1,D_NDGL_FS - DO JM=0,R_NSMAX !(note that R_NSMAX <= G_NMEN(IGLG) for all IGLG) + DO JM=0,NMEN_MAX DO JF=1,KF_FS IGLG = OFFSET_VAR+KGL-1 IF (JM <= G_NMEN(IGLG)) THEN diff --git a/src/transi/CMakeLists.txt b/src/transi/CMakeLists.txt index c320e0fb1..b254646e1 100644 --- a/src/transi/CMakeLists.txt +++ b/src/transi/CMakeLists.txt @@ -12,6 +12,13 @@ endif() configure_file( version.c.in version.c ) +set(transi_PRIVATE_LIBRARIES trans_dp) +if( HAVE_ETRANS ) + list( APPEND transi_PRIVATE_LIBRARIES + etrans_dp + ) +endif() + ecbuild_add_library( TARGET transi_dp SOURCES transi_module.F90 transi.h @@ -21,7 +28,7 @@ ecbuild_add_library( TARGET transi_dp HEADER_DESTINATION include/ectrans PUBLIC_INCLUDES $ $ - PRIVATE_LIBS trans_dp + PRIVATE_LIBS ${transi_PRIVATE_LIBRARIES} PRIVATE_DEFINITIONS ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ECTRANS_HAVE_ETRANS=${ectrans_HAVE_ETRANS} )