diff --git a/.github/tools/install-amd-flang.sh b/.github/tools/install-amd-flang.sh new file mode 100755 index 000000000..11531fd2e --- /dev/null +++ b/.github/tools/install-amd-flang.sh @@ -0,0 +1,84 @@ +#!/bin/sh + +# Install amdflang +# +# +# Originally written for Squash by +# Evan Nemerson. For documentation, bug reports, support requests, +# etc. please use . +# +# To the extent possible under law, the author(s) of this script have +# waived all copyright and related or neighboring rights to this work. +# See for +# details. + +VERSION=7.0.5 + +TEMPORARY_FILES="${TMPDIR:-/tmp}" +export AMDFLANG_INSTALL_DIR=$(pwd)/amdflang-install +export AMDFLANG_SILENT=true +while [ $# != 0 ]; do + case "$1" in + "--prefix") + export AMDFLANG_INSTALL_DIR="$2"; shift + ;; + "--tmpdir") + TEMPORARY_FILES="$2"; shift + ;; + "--verbose") + export AMDFLANG_SILENT=false; + ;; + "--version") + VERSION="$2"; shift + ;; + *) + echo "Unrecognized argument '$1'" + exit 1 + ;; + esac + shift +done + +# Example download URL for version 6.0.0 +# https://repo.radeon.com/rocm/misc/flang/rocm-afar-7450-drop-6.0.0-ubu.tar.bz2 + +# Get name of drop based on version +BASENAME=$(curl -s "https://repo.radeon.com/rocm/misc/flang/" | \ + grep -oP "rocm-afar-[1-9][0-9]*-drop-{1}.{2}.{3}" | grep "$VERSION" | sort | tail -1) + +# Get URL of drop +URL_SHORT=$(curl -s "https://repo.radeon.com/rocm/misc/flang/" | \ + grep -o "$BASENAME-ubuntu.tar.bz2" | sort | tail -1) +URL=https://repo.radeon.com/rocm/misc/flang/${URL_SHORT} + +if [ ! -f "${TEMPORARY_FILES}/${URL_SHORT}" ]; then + echo "Downloading [${URL}]" + wget -q -P ${TEMPORARY_FILES} "${URL}" +else + echo "Download already present in ${TEMPORARY_FILES}" +fi + +if [ ! -d "${AMDFLANG_INSTALL_DIR}/${BASENAME}" ]; then + if [ ! -d "${AMDFLANG_INSTALL_DIR}" ]; then + mkdir -p ${AMDFLANG_INSTALL_DIR} + fi + tar xjf ${TEMPORARY_FILES}/${URL_SHORT} -C ${AMDFLANG_INSTALL_DIR} +else + echo "Install already present in ${AMDFLANG_INSTALL_DIR}" +fi + +echo "ls ${AMDFLANG_INSTALL_DIR}" +ls ${AMDFLANG_INSTALL_DIR} + +cat > ${AMDFLANG_INSTALL_DIR}/env.sh << EOF +### Variables +export AMDFLANG_INSTALL_DIR=${AMDFLANG_INSTALL_DIR}/${BASENAME} +export AMDFLANG_VERSION=${VERSION} + +### Compilers +export PATH=\${AMDFLANG_INSTALL_DIR}/bin:\${PATH} +export LD_LIBRARY_PATH=\${AMDFLANG_INSTALL_DIR}/lib +export LD_LIBRARY_PATH=\${AMDFLANG_INSTALL_DIR}/llvm/lib:\$LD_LIBRARY_PATH +EOF + +cat ${AMDFLANG_INSTALL_DIR}/env.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2bbb436e6..772f1a2b6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,6 +35,7 @@ jobs: - linux clang-18 - linux nvhpc-25.1 - linux intel-classic + - linux amd-flang-7.0.5 - macos include: @@ -67,7 +68,7 @@ jobs: ctest_options: -E memory caching: false - - name : linux intel-classic + - name: linux intel-classic os: ubuntu-22.04 compiler: intel-classic compiler_cc: icc @@ -75,6 +76,14 @@ jobs: compiler_fc: ifort caching: true + - name: linux amd-flang-7.0.5 + os: ubuntu-24.04 + compiler: amd-flang-7.0.5 + compiler_cc: amdclang + compiler_cxx: amdclang++ + compiler_fc: amdflang + cmake_options: -DCMAKE_Fortran_FLAGS_INIT='-fPIC -fdynamic-heap-array' -DCMAKE_CXX_FLAGS_INIT=-fPIC -DCMAKE_C_FLAGS_INIT=-fPIC -DENABLE_MPI=OFF -DOpenMP_Fortran_FLAGS=-fopenmp -DOpenMP_CXX_FLAGS=-fopenmp -DOpenMP_C_FLAGS=-fopenmp + - name: macos # Xcode compiler requires empty environment variables, so we pass null (~) here os: macos-13 @@ -150,7 +159,17 @@ jobs: printenv >> $GITHUB_ENV echo "CACHE_SUFFIX=$CC-$($CC -dumpversion)" >> $GITHUB_ENV + - name: Install AMD flang compiler + if: contains( matrix.compiler, 'amd-flang' ) + shell: bash -eux {0} + run: | + ${ECTRANS_TOOLS}/install-amd-flang.sh --prefix /opt/amd-flang + source /opt/amd-flang/env.sh + echo "${AMDFLANG_INSTALL_DIR}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${AMDFLANG_INSTALL_DIR}/lib:${AMDFLANG_INSTALL_DIR}/llvm/lib" >> $GITHUB_ENV + - name: Install MPI + if: ${{ !contains( matrix.compiler, 'amd-flang' ) }} shell: bash -eux {0} run: | FCFLAGS=-fPIC CFLAGS=-fPIC FFLAGS=-fPIC ${ECTRANS_TOOLS}/install-mpi.sh --mpi openmpi --prefix ${DEPS_DIR}/openmpi @@ -187,8 +206,8 @@ jobs: ecmwf-ifs/fiat dependency_branch: develop dependency_cmake_options: | - ecmwf-ifs/fiat: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_MPI=ON" - cmake_options: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.cmake_options }} -DENABLE_MPI=ON -DENABLE_FFTW=ON -DENABLE_ETRANS=ON" + ecmwf-ifs/fiat: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_MPI=ON ${{ matrix.cmake_options }}" + cmake_options: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_MPI=ON -DENABLE_FFTW=ON -DENABLE_ETRANS=ON ${{ matrix.cmake_options }}" ctest_options: "${{ matrix.ctest_options }}" - name: Verify tools diff --git a/tests/test-install.sh.in b/tests/test-install.sh.in index 82660b1e9..00e6875e8 100755 --- a/tests/test-install.sh.in +++ b/tests/test-install.sh.in @@ -49,6 +49,10 @@ cmake $SOURCE \ make VERBOSE=1 +if [[ "@CMAKE_Fortran_COMPILER_ID@" = "LLVMFlang" ]]; then + ulimit -s unlimited +fi + if [ -f bin/main_dp ]; then bin/main_dp fi diff --git a/tests/trans/test_dirtrans_adjoint.F90 b/tests/trans/test_dirtrans_adjoint.F90 index 24c2ee763..0d47f5f6c 100644 --- a/tests/trans/test_dirtrans_adjoint.F90 +++ b/tests/trans/test_dirtrans_adjoint.F90 @@ -162,6 +162,11 @@ PROGRAM TEST_DIRTRANS_ADJOINT CALL DIST_SPEC(PSPECG=ZVORXG, KFDISTG=JP_NUMLEVELS_G, KFROM=ITOSP, PSPEC=ZVORX, KVSET=IVSET) CALL DIST_SPEC(PSPECG=ZDIVXG, KFDISTG=JP_NUMLEVELS_G, KFROM=ITOSP, PSPEC=ZDIVX, KVSET=IVSET) +! We don't need these anymore +DEALLOCATE(ZSPECXG) +DEALLOCATE(ZVORXG) +DEALLOCATE(ZDIVXG) + ! ===== Allocate and initialize gridpoint data ===== ALLOCATE(ZGXG(IGPTOTG,3*JP_NUMLEVELS_G)) ALLOCATE(ZGX(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS)) @@ -187,6 +192,11 @@ PROGRAM TEST_DIRTRANS_ADJOINT ADJ_VALUE_1 = ADJ_VALUE_1 + SCALPRODSP(ZVORX, ZVORY) ADJ_VALUE_1 = ADJ_VALUE_1 + SCALPRODSP(ZDIVX, ZDIVY) +! We don't need these anymore +DEALLOCATE(ZSPECY) +DEALLOCATE(ZVORY) +DEALLOCATE(ZDIVY) + ! ===== Compute dirtrans_adj and gather result on proc 1 ===== ! i.e. dirtrans_adj(rspscalarx, rspvorx, rspdivx) = rgpy @@ -196,13 +206,16 @@ PROGRAM TEST_DIRTRANS_ADJOINT ! ===== Compute: adj_value2 = ===== ! i.e. adj_value2 = -ADJ_VALUE_2 = SCALPRODGP(ZGY, ZGX) +ZGX = ZGX * ZGY +CALL GATH_GRID(ZGXG, JPPROMA, 3*JP_NUMLEVELS_G, ITOGP, PGP=ZGX) ! Only task 1 should perform the correctness check IF (IMYPROC == 1) THEN ! ===== Compare inner products ===== ! i.e. == + ADJ_VALUE_2 = SUM(ZGXG) + ZRELATIVE_ERROR = ABS(ADJ_VALUE_1 - ADJ_VALUE_2) / ABS(ADJ_VALUE_1) WRITE(IOUT, '(A,1E30.15)') ' = ', ADJ_VALUE_1 @@ -232,39 +245,6 @@ PROGRAM TEST_DIRTRANS_ADJOINT CONTAINS -FUNCTION SCALPRODGP(RGP1, RGP2) RESULT(RSC) - - ! Scalar product in spectral space - REAL(KIND=JPRB) :: RGP1(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS), RGP2(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS) - REAL(KIND=JPRB) :: RSC - - INTEGER(KIND=JPIM) :: JLEV, JKGLO, IEND, IBL, JROF - REAL(KIND=JPRB) :: RGP(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS), RGPG(IGPTOTG,3*JP_NUMLEVELS_G) - - RSC = 0.0_JPRB - - !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JLEV,JKGLO,IEND,IBL,JROF) - DO JLEV = 1, 3 * JP_NUMLEVELS_G - DO JKGLO = 1, IGPTOT, JPPROMA - IEND = MIN(JPPROMA, IGPTOT - JKGLO + 1) - IBL = (JKGLO - 1) / JPPROMA+1 - DO JROF = 1, IEND - RGP(JROF,JLEV,IBL) = RGP1(JROF,JLEV,IBL) * RGP2(JROF,JLEV,IBL) - ENDDO - ENDDO - ENDDO - !$OMP END PARALLEL DO - - CALL GATH_GRID(RGPG, JPPROMA, 3*JP_NUMLEVELS_G, ITOGP, PGP=RGP) - - IF(IMYPROC == 1) THEN - RSC = SUM(RGPG) - ELSE - RSC = 0.0_JPRB - ENDIF - -END FUNCTION SCALPRODGP - FUNCTION SCALPRODSP(PSP1,PSP2) RESULT(PSC) ! Scalar product in spectral space @@ -340,4 +320,4 @@ FUNCTION DETECT_MPIRUN() RESULT(LMPI_REQUIRED) ENDIF END FUNCTION -END PROGRAM TEST_DIRTRANS_ADJOINT \ No newline at end of file +END PROGRAM TEST_DIRTRANS_ADJOINT diff --git a/tests/trans/test_invtrans_adjoint.F90 b/tests/trans/test_invtrans_adjoint.F90 index d173c5ae8..f0b79a8ad 100644 --- a/tests/trans/test_invtrans_adjoint.F90 +++ b/tests/trans/test_invtrans_adjoint.F90 @@ -162,6 +162,11 @@ PROGRAM TEST_INVTRANS_ADJOINT CALL DIST_SPEC(PSPECG=ZVORXG, KFDISTG=JP_NUMLEVELS_G, KFROM=ITOSP, PSPEC=ZVORX, KVSET=IVSET) CALL DIST_SPEC(PSPECG=ZDIVXG, KFDISTG=JP_NUMLEVELS_G, KFROM=ITOSP, PSPEC=ZDIVX, KVSET=IVSET) +! We don't need these anymore +DEALLOCATE(ZSPECXG) +DEALLOCATE(ZVORXG) +DEALLOCATE(ZDIVXG) + ! ===== Allocate and initialize gridpoint data ===== ALLOCATE(ZGXG(IGPTOTG,3*JP_NUMLEVELS_G)) ALLOCATE(ZGX(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS)) @@ -183,7 +188,8 @@ PROGRAM TEST_INVTRANS_ADJOINT ! ===== Compute: adj_value2 = ===== ! i.e. adj_value2 = -ADJ_VALUE_1 = SCALPRODGP(ZGY, ZGX) +ZGY = ZGX * ZGY +CALL GATH_GRID(ZGXG, JPPROMA, 3*JP_NUMLEVELS_G, ITOGP, PGP=ZGY) ! ===== Compute adjoint invtrans and gather result on proc 1 ===== ! i.e. invtrans_adj(rgpx) = (rspscalary, rspvory, rspdivy) @@ -211,6 +217,8 @@ PROGRAM TEST_INVTRANS_ADJOINT ! ===== Compare inner products ===== ! i.e. == + ADJ_VALUE_1 = SUM(ZGXG) + ZRELATIVE_ERROR = ABS(ADJ_VALUE_1 - ADJ_VALUE_2) / ABS(ADJ_VALUE_1) WRITE(IOUT, '(A,1E30.15)') ' = ', ADJ_VALUE_1 @@ -240,39 +248,6 @@ PROGRAM TEST_INVTRANS_ADJOINT CONTAINS -FUNCTION SCALPRODGP(RGP1, RGP2) RESULT(RSC) - - ! Scalar product in spectral space - REAL(KIND=JPRB) :: RGP1(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS), RGP2(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS) - REAL(KIND=JPRB) :: RSC - - INTEGER(KIND=JPIM) :: JLEV, JKGLO, IEND, IBL, JROF - REAL(KIND=JPRB) :: RGP(JPPROMA,3*JP_NUMLEVELS_G,IGPBLKS), RGPG(IGPTOTG,3*JP_NUMLEVELS_G) - - RSC = 0.0_JPRB - - !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JLEV,JKGLO,IEND,IBL,JROF) - DO JLEV = 1, 3 * JP_NUMLEVELS_G - DO JKGLO = 1, IGPTOT, JPPROMA - IEND = MIN(JPPROMA, IGPTOT - JKGLO + 1) - IBL = (JKGLO - 1) / JPPROMA+1 - DO JROF = 1, IEND - RGP(JROF,JLEV,IBL) = RGP1(JROF,JLEV,IBL) * RGP2(JROF,JLEV,IBL) - ENDDO - ENDDO - ENDDO - !$OMP END PARALLEL DO - - CALL GATH_GRID(RGPG, JPPROMA, 3*JP_NUMLEVELS_G, ITOGP, PGP=RGP) - - IF(IMYPROC == 1) THEN - RSC = SUM(RGPG) - ELSE - RSC = 0.0_JPRB - ENDIF - -END FUNCTION SCALPRODGP - FUNCTION SCALPRODSP(PSP1,PSP2) RESULT(PSC) ! Scalar product in spectral space