From 1330e5d93dfd9578510b481b8c4ebd4a85e71209 Mon Sep 17 00:00:00 2001 From: AMLattanzi Date: Thu, 6 Nov 2025 19:24:27 -0800 Subject: [PATCH 01/44] Move scripts for PM into own directory and make some examples for GPU and CPU. --- .../build_erf_with_shoc_cuda_Perlmutter.sh | 0 .../Perlmutter/cmake_with_cuda_perlmutter.sh | 33 +++++++++++++ .../Perlmutter/cmake_with_cuda_perlmutter.sh~ | 49 +++++++++++++++++++ .../cmake_with_cuda_shoc_Perlmutter.sh} | 13 ++++- .../cmake_with_cuda_shoc_netcdf_perlmutter.sh | 36 ++++++++++++++ ...okkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh | 0 .../cmake_with_shoc_netcdf_perlmutter.sh | 36 ++++++++++++++ Build/cmake_cuda_perlmutter.sh | 17 ------- 8 files changed, 166 insertions(+), 18 deletions(-) rename Build/{ => Perlmutter}/build_erf_with_shoc_cuda_Perlmutter.sh (100%) create mode 100644 Build/Perlmutter/cmake_with_cuda_perlmutter.sh create mode 100644 Build/Perlmutter/cmake_with_cuda_perlmutter.sh~ rename Build/{cmake_with_shoc_cuda_Perlmutter.sh => Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh} (53%) create mode 100644 Build/Perlmutter/cmake_with_cuda_shoc_netcdf_perlmutter.sh rename Build/{ => Perlmutter}/cmake_with_kokkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh (100%) create mode 100644 Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh delete mode 100644 Build/cmake_cuda_perlmutter.sh diff --git a/Build/build_erf_with_shoc_cuda_Perlmutter.sh b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh similarity index 100% rename from Build/build_erf_with_shoc_cuda_Perlmutter.sh rename to Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh diff --git a/Build/Perlmutter/cmake_with_cuda_perlmutter.sh b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh new file mode 100644 index 0000000000..3aef2a62fc --- /dev/null +++ b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Load the needed modules +module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel + +# GPU-aware mpi is on by default (set it anyways) +export MPICH_GPU_SUPPORT_ENABLED=1 + +# Deduce the lib paths and files with $(CC/cc/ftn --cray-print-opts=libs) +CRAY_LIBS_CLEAN=$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g') +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(cc --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(ftn --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" + +# Configure and build +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ + -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123 -lmpi_gtl_cuda" \ + -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123 -lmpi_gtl_cuda" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--no-as-needed $CRAY_LIBS_CLEAN" \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. && make -j8 diff --git a/Build/Perlmutter/cmake_with_cuda_perlmutter.sh~ b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh~ new file mode 100644 index 0000000000..747a399124 --- /dev/null +++ b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh~ @@ -0,0 +1,49 @@ +#!/bin/bash + +# Load the needed modules +module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel + +# GPU-aware mpi is on by default (set it anyways) +export MPICH_GPU_SUPPORT_ENABLED=1 + +# Deduce the lib paths and files with $(CC/cc/ftn --cray-print-opts=libs) +CRAY_LIBS_CLEAN=$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g') +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(cc --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(ftn --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" + +# Configure and build +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ + -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123" \ + -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--no-as-needed $CRAY_LIBS_CLEAN" \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. && make -j8 + + + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_PREFIX_PATH:PATH=${CUDA_HOME}/../../ \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_NVHPC:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. && make -j8 diff --git a/Build/cmake_with_shoc_cuda_Perlmutter.sh b/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh similarity index 53% rename from Build/cmake_with_shoc_cuda_Perlmutter.sh rename to Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh index 82d06ffb9f..d467f60005 100644 --- a/Build/cmake_with_shoc_cuda_Perlmutter.sh +++ b/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh @@ -1,9 +1,20 @@ #!/bin/bash +module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel + +# NOTE: $(CC --cray-print-opts=libs) can be used to deduce libmpi_gnu_123.so + cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_PREFIX_PATH:PATH=${CUDATOOLKIT_HOME}/../../ \ -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ -DCMAKE_BUILD_TYPE:STRING=Release \ - -DCMAKE_PREFIX_PATH:PATH=${CUDATOOLKIT_HOME}/../../ \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ + -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123" \ + -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123" \ -DERF_DIM:STRING=3 \ -DERF_ENABLE_MPI:BOOL=ON \ -DERF_ENABLE_TESTS:BOOL=ON \ diff --git a/Build/Perlmutter/cmake_with_cuda_shoc_netcdf_perlmutter.sh b/Build/Perlmutter/cmake_with_cuda_shoc_netcdf_perlmutter.sh new file mode 100644 index 0000000000..8f16e72384 --- /dev/null +++ b/Build/Perlmutter/cmake_with_cuda_shoc_netcdf_perlmutter.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Load the needed modules +module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel + +# GPU-aware mpi is on by default (set it anyways) +export MPICH_GPU_SUPPORT_ENABLED=1 + +# Deduce the lib paths and files with $(CC/cc/ftn --cray-print-opts=libs) +CRAY_LIBS_CLEAN=$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g') +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(cc --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(ftn --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" + +# Configure and build +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ + -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123 -lmpi_gtl_cuda" \ + -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123 -lmpi_gtl_cuda" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--no-as-needed $CRAY_LIBS_CLEAN" \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. && make -j8 diff --git a/Build/cmake_with_kokkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh b/Build/Perlmutter/cmake_with_kokkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh similarity index 100% rename from Build/cmake_with_kokkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh rename to Build/Perlmutter/cmake_with_kokkos_ekat_hdf5_netcdf_fftw3_perlmutter.sh diff --git a/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh b/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh new file mode 100644 index 0000000000..7e04de356f --- /dev/null +++ b/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Load the needed modules +module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel + +# Deactive GPU aware MPI for CPU build +export MPICH_GPU_SUPPORT_ENABLED=0 +export CRAY_ACCEL_TARGET=none + +# Deduce the lib paths and files with $(CC/cc/ftn --cray-print-opts=libs) +CRAY_LIBS_CLEAN=$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g') +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(cc --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(ftn --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" + +# Configure and build +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ + -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123" \ + -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--no-as-needed $CRAY_LIBS_CLEAN" \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + .. && make -j8 diff --git a/Build/cmake_cuda_perlmutter.sh b/Build/cmake_cuda_perlmutter.sh deleted file mode 100644 index adfe9a1257..0000000000 --- a/Build/cmake_cuda_perlmutter.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# Example CMake config script for an OSX laptop with OpenMPI - -cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ - -DCMAKE_PREFIX_PATH:PATH=${CUDA_HOME}/../../ \ - -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ - -DCMAKE_BUILD_TYPE:STRING=Release \ - -DERF_DIM:STRING=3 \ - -DERF_ENABLE_MPI:BOOL=ON \ - -DERF_ENABLE_CUDA:BOOL=ON \ - -DERF_ENABLE_NVHPC:BOOL=ON \ - -DERF_ENABLE_TESTS:BOOL=ON \ - -DERF_ENABLE_FCOMPARE:BOOL=ON \ - -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ - -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ - .. && make -j8 From 2efb8b179361e759707eb8f7ca5690ca2e174eb3 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 05:58:54 -0800 Subject: [PATCH 02/44] Add netcdf-cxx4_parallel and other fallbacks and a more descriptive error message --- Exec/Make.ERF | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/Exec/Make.ERF b/Exec/Make.ERF index 79667a88ca..3ae9b9ee71 100644 --- a/Exec/Make.ERF +++ b/Exec/Make.ERF @@ -208,8 +208,21 @@ ifeq ($(USE_NOAHMP), TRUE) $(error USE_NETCDF must be true for using NOAH-MP interface) else DEFINES += -DERF_USE_NOAHMP - includes += $(shell pkg-config --cflags netcdf-fortran) - LIBRARIES += $(shell pkg-config --libs netcdf-fortran) + + # Try netcdf-fortran => netcdf-fortran_parallel + has_netcdf_fortran := $(shell pkg-config --exists netcdf-fortran 2>/dev/null; echo $$?) + ifeq ($(has_netcdf_fortran),0) + includes += $(shell pkg-config --cflags netcdf-fortran) + LIBRARIES += $(shell pkg-config --libs netcdf-fortran) + else + has_netcdf_fortran_parallel := $(shell pkg-config --exists netcdf-fortran_parallel 2>/dev/null; echo $$?) + ifeq ($(has_netcdf_fortran_parallel),0) + includes += $(shell pkg-config --cflags netcdf-fortran_parallel) + LIBRARIES += $(shell pkg-config --libs netcdf-fortran_parallel) + else + $(error NetCDF Fortran not found. Tried netcdf-fortran and netcdf-fortran_parallel) + endif + endif NOAHMP_HOME ?= $(ERF_HOME)/Submodules/Noah-MP @@ -396,13 +409,26 @@ endif # Turn on NetCDF macro define ifeq ($(USE_NETCDF), TRUE) DEFINES += -DERF_USE_NETCDF - has_netcdf_mpi := $(shell pkg-config --cflags netcdf-mpi > /dev/null 2>&1; echo $$?) - ifeq ($(has_netcdf_mpi),0) - includes += $(shell pkg-config --cflags netcdf-mpi) - LIBRARIES += $(shell pkg-config --libs netcdf-mpi) + + # Try netcdf => netcdf-cxx4_parallel => netcdf_parallel + has_netcdf := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf 2>/dev/null; echo $$?) + ifeq ($(has_netcdf),0) + includes += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --cflags netcdf) + LIBRARIES += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --libs netcdf) else - includes += $(shell pkg-config --cflags netcdf) - LIBRARIES += $(shell pkg-config --libs netcdf) + has_netcdf_cxx4_parallel := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf-cxx4_parallel 2>/dev/null; echo $$?) + ifeq ($(has_netcdf_cxx4_parallel),0) + includes += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --cflags netcdf-cxx4_parallel) + LIBRARIES += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --libs netcdf-cxx4_parallel) + else + has_netcdf_parallel := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf_parallel 2>/dev/null; echo $$?) + ifeq ($(has_netcdf_parallel),0) + includes += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --cflags netcdf_parallel) + LIBRARIES += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --libs netcdf_parallel) + else + $(error NetCDF not found. Tried netcdf, netcdf-cxx4_parallel, and netcdf_parallel) + endif + endif endif endif From f8800d5cf38203c20bfe42f3d304362552955ed2 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 06:18:02 -0800 Subject: [PATCH 03/44] Add similar noahmp netcdf-fortran logic --- Exec/Make.ERF | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Exec/Make.ERF b/Exec/Make.ERF index 3ae9b9ee71..1aaca5c755 100644 --- a/Exec/Make.ERF +++ b/Exec/Make.ERF @@ -208,17 +208,17 @@ ifeq ($(USE_NOAHMP), TRUE) $(error USE_NETCDF must be true for using NOAH-MP interface) else DEFINES += -DERF_USE_NOAHMP - + # Try netcdf-fortran => netcdf-fortran_parallel - has_netcdf_fortran := $(shell pkg-config --exists netcdf-fortran 2>/dev/null; echo $$?) + has_netcdf_fortran := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf-fortran 2>/dev/null; echo $$?) ifeq ($(has_netcdf_fortran),0) - includes += $(shell pkg-config --cflags netcdf-fortran) - LIBRARIES += $(shell pkg-config --libs netcdf-fortran) + includes += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --cflags netcdf-fortran) + LIBRARIES += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --libs netcdf-fortran) else - has_netcdf_fortran_parallel := $(shell pkg-config --exists netcdf-fortran_parallel 2>/dev/null; echo $$?) + has_netcdf_fortran_parallel := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf-fortran_parallel 2>/dev/null; echo $$?) ifeq ($(has_netcdf_fortran_parallel),0) - includes += $(shell pkg-config --cflags netcdf-fortran_parallel) - LIBRARIES += $(shell pkg-config --libs netcdf-fortran_parallel) + includes += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --cflags netcdf-fortran_parallel) + LIBRARIES += $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --libs netcdf-fortran_parallel) else $(error NetCDF Fortran not found. Tried netcdf-fortran and netcdf-fortran_parallel) endif @@ -409,7 +409,7 @@ endif # Turn on NetCDF macro define ifeq ($(USE_NETCDF), TRUE) DEFINES += -DERF_USE_NETCDF - + # Try netcdf => netcdf-cxx4_parallel => netcdf_parallel has_netcdf := $(shell if [ -n "$$MPICH_DIR" ]; then export PKG_CONFIG_PATH="$$MPICH_DIR/lib/pkgconfig:$$PKG_CONFIG_PATH"; fi; pkg-config --exists netcdf 2>/dev/null; echo $$?) ifeq ($(has_netcdf),0) From 879c4f28f995474b5aabaedbe8bd127b8c588e21 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 07:02:05 -0800 Subject: [PATCH 04/44] Minor path fixing and additional validation script --- .../build_erf_with_shoc_cuda_Perlmutter.sh | 6 +- .../Perlmutter/cmake_with_cuda_perlmutter.sh | 1 + .../cmake_with_cuda_shoc_Perlmutter.sh | 6 + Build/setup_cmake_validation.sh | 316 ++++++++++++++++++ 4 files changed, 326 insertions(+), 3 deletions(-) mode change 100644 => 100755 Build/Perlmutter/cmake_with_cuda_perlmutter.sh create mode 100755 Build/setup_cmake_validation.sh diff --git a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh index add85b002e..1bf15c1b18 100644 --- a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh +++ b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh @@ -19,11 +19,11 @@ fi # 3. Prepare build directory echo "Preparing build directory..." mkdir -p "$ERF_DIR/build" -cp "$ERF_DIR/Build/cmake_with_shoc_cuda_Perlmutter.sh" "$ERF_DIR/build/" +cp "$ERF_DIR/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh" "$ERF_DIR/build/" # 4. Move into build directory cd "$ERF_DIR/build" # 5. Run cmake setup -echo "Running cmake_with_shoc_cuda_Perlmutter.sh..." -source cmake_with_shoc_cuda_Perlmutter.sh +echo "Running cmake_with_cuda_shoc_Perlmutter.sh..." +source cmake_with_cuda_shoc_Perlmutter.sh diff --git a/Build/Perlmutter/cmake_with_cuda_perlmutter.sh b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh old mode 100644 new mode 100755 index 3aef2a62fc..316dbc2294 --- a/Build/Perlmutter/cmake_with_cuda_perlmutter.sh +++ b/Build/Perlmutter/cmake_with_cuda_perlmutter.sh @@ -26,6 +26,7 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ -DERF_DIM:STRING=3 \ -DERF_ENABLE_MPI:BOOL=ON \ -DERF_ENABLE_TESTS:BOOL=ON \ + -DAMReX_CUDA_ARCH=8.0 \ -DERF_ENABLE_CUDA:BOOL=ON \ -DERF_ENABLE_FCOMPARE:BOOL=ON \ -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ diff --git a/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh b/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh index d467f60005..c681d79617 100644 --- a/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh +++ b/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh @@ -3,6 +3,11 @@ module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel # NOTE: $(CC --cray-print-opts=libs) can be used to deduce libmpi_gnu_123.so +# Depending on your module version, you may want to add all flags to EXE_LINKER_FLAGS without the as-needed flag if you're building with the fcompare tools + +CRAY_LIBS_CLEAN=$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g') +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(cc --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" +CRAY_LIBS_CLEAN="$CRAY_LIBS_CLEAN $(ftn --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')" cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ -DCMAKE_PREFIX_PATH:PATH=${CUDATOOLKIT_HOME}/../../ \ @@ -13,6 +18,7 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install \ -DCMAKE_C_FLAGS="$(cc --cray-print-opts=cflags)" \ -DCMAKE_CXX_FLAGS="$(CC --cray-print-opts=cflags)" \ -DCMAKE_CUDA_FLAGS="$(CC --cray-print-opts=cflags)" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--no-as-needed $CRAY_LIBS_CLEAN" \ -DCMAKE_CXX_STANDARD_LIBRARIES="-lmpi_gnu_123" \ -DCMAKE_CUDA_STANDARD_LIBRARIES="-lmpi_gnu_123" \ -DERF_DIM:STRING=3 \ diff --git a/Build/setup_cmake_validation.sh b/Build/setup_cmake_validation.sh new file mode 100755 index 0000000000..2c534e13b7 --- /dev/null +++ b/Build/setup_cmake_validation.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +set -e +set -o pipefail + +# Function to verify if a directory is the ERF repo root +verify_erf_dir() { + local dir=$1 + + # Check for basic structure + if [ ! -f "$dir/CMakeLists.txt" ] || [ ! -d "$dir/Source" ]; then + return 1 + fi + + # Check for "Energy Research and Forecasting" in key files + local found=0 + + if [ -f "$dir/README.rst" ]; then + if grep -q "Energy Research and Forecasting" "$dir/README.rst" 2>/dev/null || true; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/LICENSE.md" ]; then + if grep -q "Energy Research and Forecasting" "$dir/LICENSE.md" 2>/dev/null || true; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/CITATION.cff" ]; then + if grep -q "Energy Research and Forecasting" "$dir/CITATION.cff" 2>/dev/null || true; then + found=1 + fi + fi + + return $((1 - found)) +} + +# Function to find ERF repo root +find_erf_dir() { + # Method 1: Check if we're already in Build/ + if [ -f "../CMakeLists.txt" ] && [ -d "../Source" ]; then + local candidate="$(cd .. && pwd)" + if verify_erf_dir "$candidate"; then + ERF_DIR="$candidate" + echo "Detected ERF_DIR from Build location: $ERF_DIR" + return 0 + fi + fi + + # Method 2: Use git to find repo root + if command -v git &> /dev/null; then + if git rev-parse --is-inside-work-tree &> /dev/null 2>&1; then + local git_root="$(git rev-parse --show-toplevel)" + if verify_erf_dir "$git_root"; then + ERF_DIR="$git_root" + echo "Detected ERF_DIR from git: $ERF_DIR" + return 0 + fi + fi + fi + + # Method 3: Try going up from script location + local script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + # Check if script is in Build/ directory + if [[ "$script_dir" =~ /Build$ ]]; then + local candidate="$(dirname "$script_dir")" + if verify_erf_dir "$candidate"; then + ERF_DIR="$candidate" + echo "Detected ERF_DIR from script location: $ERF_DIR" + return 0 + fi + fi + + # Method 4: Check current directory + if verify_erf_dir "$PWD"; then + ERF_DIR="$PWD" + echo "Detected ERF_DIR from current directory: $ERF_DIR" + return 0 + fi + + return 1 +} + +# Parse arguments +if [ $# -lt 1 ] || [ $# -gt 3 ]; then + echo "Usage: $0 [script_pattern] [erf_dir]" + echo "" + echo "Sets:" + echo " default - Scripts from Build/" + echo " perlmutter - Scripts from Build/Perlmutter/" + echo " gnu_ekat - Scripts from Build/GNU_Ekat/" + echo "" + echo "If script_pattern is provided, creates build_/" + echo "Otherwise creates build_/" + echo "" + echo "If erf_dir is provided, uses that as ERF_DIR" + echo "Otherwise auto-detects ERF repo root" + exit 1 +fi + +SET=$1 +PATTERN=${2:-} +ERF_DIR_ARG=${3:-} + +# Set ERF_DIR +if [ -n "$ERF_DIR_ARG" ]; then + ERF_DIR="$ERF_DIR_ARG" + echo "Using provided ERF_DIR: $ERF_DIR" + if ! verify_erf_dir "$ERF_DIR"; then + echo "Error: Provided directory is not a valid ERF repository" + echo "Must contain 'Energy Research and Forecasting' in README.rst, LICENSE.md, or CITATION.cff" + exit 1 + fi +else + if ! find_erf_dir; then + echo "Error: Could not auto-detect ERF_DIR" + echo "Please provide it as the third argument or run from ERF Build/ directory" + echo "" + echo "Verification checks for:" + echo " - CMakeLists.txt and Source/ directory" + echo " - 'Energy Research and Forecasting' in README.rst, LICENSE.md, or CITATION.cff" + exit 1 + fi +fi + +echo "ERF_DIR set to: $ERF_DIR" + +# Define source directories relative to ERF_DIR +DEFAULT_DIR="$ERF_DIR/Build" +PERLMUTTER_DIR="$ERF_DIR/Build/Perlmutter" +GNU_EKAT_DIR="$ERF_DIR/Build/GNU_Ekat" + +case $SET in + default) + SRC_DIR="$DEFAULT_DIR" + ;; + perlmutter) + SRC_DIR="$PERLMUTTER_DIR" + ;; + gnu_ekat) + SRC_DIR="$GNU_EKAT_DIR" + ;; + *) + echo "Error: Invalid set '$SET'" + echo "Choose: default, perlmutter, or gnu_ekat" + exit 1 + ;; +esac + +if [ ! -d "$SRC_DIR" ]; then + echo "Error: Source directory does not exist: $SRC_DIR" + exit 1 +fi + +# Determine build directory name +if [ -n "$PATTERN" ]; then + BUILD_DIR="$ERF_DIR/build_${PATTERN}" +else + BUILD_DIR="$ERF_DIR/build_${SET}" +fi + +# Create build directory +mkdir -p "$BUILD_DIR" +echo "Created directory: $BUILD_DIR" + +# Find and copy ERF cmake build scripts +echo "Scanning for ERF cmake scripts in $SRC_DIR:" +COPIED=0 +SKIPPED=0 + +# Temporarily disable exit on error for the loop +set +e + +for script in "$SRC_DIR"/*.sh; do + # Check if file exists (glob might not match anything) + if [ ! -f "$script" ]; then + continue + fi + + basename_script=$(basename "$script") + + # Skip backup files + if [[ "$basename_script" =~ ~$ ]]; then + SKIPPED=$((SKIPPED + 1)) + continue + fi + + # Check if it's an ERF cmake script (contains DERF or cmake) + has_derf=0 + has_cmake=0 + + grep -q "DERF" "$script" 2>/dev/null && has_derf=1 + grep -q "cmake" "$script" 2>/dev/null && has_cmake=1 + + if [ $has_derf -eq 1 ] || [ $has_cmake -eq 1 ]; then + cp "$script" "$BUILD_DIR/" + chmod +x "$BUILD_DIR/$basename_script" + echo " ✓ $basename_script" + COPIED=$((COPIED + 1)) + else + echo " ✗ $basename_script (no DERF or cmake found)" + SKIPPED=$((SKIPPED + 1)) + fi +done + +# Re-enable exit on error +set -e + +echo "" +echo "Summary: Copied $COPIED script(s), skipped $SKIPPED" + +if [ $COPIED -eq 0 ]; then + echo "Warning: No ERF cmake scripts found" + echo "Scripts should contain 'DERF' or 'cmake'" +fi + +# Create a run script in the build directory +cat > "$BUILD_DIR/run.sh" << 'EOF' +#!/bin/bash + +set -e +set -o pipefail + +# Resolve ERF_DIR (go up from build directory) +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export ERF_DIR="$(dirname "$SCRIPT_DIR")" + +echo "ERF_DIR set to: $ERF_DIR" + +# Find all .sh scripts (excluding run.sh and backups) +SCRIPTS=() +for script in *.sh; do + if [ "$script" = "run.sh" ]; then + continue + fi + if [[ "$script" =~ ~$ ]]; then + continue + fi + if [ -f "$script" ]; then + SCRIPTS+=("$script") + fi +done + +# Sort scripts alphabetically +IFS=$'\n' SCRIPTS=($(sort <<<"${SCRIPTS[*]}")) +unset IFS + +if [ ${#SCRIPTS[@]} -eq 0 ]; then + echo "Error: No build scripts found in this directory" + exit 1 +fi + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "" + echo "Available ERF cmake scripts:" + for i in "${!SCRIPTS[@]}"; do + script_base="${SCRIPTS[$i]%.sh}" + printf "%3d: %s\n" $((i+1)) "${SCRIPTS[$i]}" + printf " → subdirectory: %s/script_%s/\n" "$ERF_DIR" "$script_base" + done + echo "" + echo "Each script will run in its own clean subdirectory at ERF root." + exit 1 +fi + +NUM=$1 +if [ $NUM -lt 1 ] || [ $NUM -gt ${#SCRIPTS[@]} ]; then + echo "Error: Number must be between 1 and ${#SCRIPTS[@]}" + exit 1 +fi + +SCRIPT="${SCRIPTS[$((NUM-1))]}" +SCRIPT_BASE="${SCRIPT%.sh}" +SUBDIR="$ERF_DIR/script_${SCRIPT_BASE}" + +# Create a clean subdirectory for this script +if [ -d "$SUBDIR" ]; then + echo "Warning: $SUBDIR already exists" + read -p "Delete and recreate? (y/N) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + rm -rf "$SUBDIR" + else + echo "Aborting. Please remove $SUBDIR manually or choose a different script." + exit 1 + fi +fi + +mkdir -p "$SUBDIR" +echo "========================================" +echo "Running: $SCRIPT" +echo "Build directory: $SUBDIR" +echo "Working directory: $SUBDIR" +echo "========================================" +echo "" + +# Copy the script into the subdirectory and run it there +cp "$SCRIPT" "$SUBDIR/" +cd "$SUBDIR" +bash "./$SCRIPT" +EOF + +chmod +x "$BUILD_DIR/run.sh" + +echo "" +echo "Setup complete!" +echo "Build directory: $BUILD_DIR" +echo "" +echo "To use:" +echo " cd $BUILD_DIR" +echo " ./run.sh # List available scripts" +echo " ./run.sh # Run a specific script" +echo "" +echo "Copied $COPIED script(s)" From d19acb7962a54792a8fba51dd7aceee57082fe7c Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 11:29:08 -0800 Subject: [PATCH 05/44] Automatically make ./wrapper_clean_build.sh ./cmake_cuda.sh work on perlmutter --- Build/wrapper_clean_build.sh | 193 ++++++++++ Build/wrapper_clean_build_auto.sh | 55 +++ CMake/CrayDetection.cmake | 618 ++++++++++++++++++++++++++++++ CMakeLists.txt | 3 + 4 files changed, 869 insertions(+) create mode 100755 Build/wrapper_clean_build.sh create mode 100755 Build/wrapper_clean_build_auto.sh create mode 100644 CMake/CrayDetection.cmake diff --git a/Build/wrapper_clean_build.sh b/Build/wrapper_clean_build.sh new file mode 100755 index 0000000000..b647e76dc5 --- /dev/null +++ b/Build/wrapper_clean_build.sh @@ -0,0 +1,193 @@ +#!/bin/bash +set -e + +# ============================================================================ +# CMake Build Wrapper with Cleanup (distclean equivalent) +# ============================================================================ +# +# MODERN CMAKE PRACTICE: +# ---------------------- +# The current best practice for CMake is to use out-of-source builds with: +# cmake -S -B +# +# For example: +# cmake -S .. -B build_release +# cmake --build build_release +# cmake --install build_release --prefix install_release +# +# This keeps your source tree clean and allows multiple build configurations +# (debug, release, different compilers, etc.) in separate directories. +# +# MODERN CMAKE INSTALL: +# --------------------- +# The newer cmake --install command (CMake 3.15+) provides a cleaner interface +# than the older "make install" or "cmake --build . --target install": +# +# cmake --install --prefix +# +# Examples: +# # Install to default CMAKE_INSTALL_PREFIX (set during configure) +# cmake --install build_release +# +# # Install to custom location +# cmake --install build_release --prefix /opt/erf +# +# # Install to local directory +# cmake --install build_release --prefix ./install +# +# This is preferred because: +# - Works regardless of build system (make, ninja, etc.) +# - Doesn't require entering the build directory +# - More explicit and consistent syntax +# - Allows overriding install location without reconfiguring +# +# Note: You can still set CMAKE_INSTALL_PREFIX during configuration: +# cmake -S .. -B build_release -DCMAKE_INSTALL_PREFIX=/usr/local +# +# ABOUT ERF/Build DIRECTORY: +# -------------------------- +# The ERF/Build directory is primarily intended as a single build directory +# for users doing one configuration. If you're testing multiple configurations +# (CPU, GPU, different flags), you should use separate build directories: +# ERF/build_cpu/ +# ERF/build_gpu/ +# ERF/build_debug/ +# etc. +# +# With corresponding install directories if needed: +# ERF/install_cpu/ +# ERF/install_gpu/ +# ERF/install_debug/ +# +# CLEANUP BEHAVIOR (GNU Make Standard): +# ------------------------------------- +# This script performs a 'distclean' equivalent operation, which per GNU +# standards means: "Delete all files in the current directory (or created +# by this makefile) that are created by configuring or building the program." +# +# For CMake, this includes: +# - CMakeCache.txt (configuration file) +# - CMakeFiles/ (generated build system files) +# - *.cmake (generated configuration scripts) +# - Makefile (if generated) +# - Any other CMake-generated artifacts +# +# This ensures a completely fresh configuration and build, as if you had +# just unpacked the source distribution. +# +# Note: This does NOT delete install directories - those should be managed +# separately (equivalent to 'uninstall' target in GNU make). +# +# ============================================================================ + +SCRIPT=$1 + +if [ -z "$SCRIPT" ]; then + echo "ERROR: No build script provided" + echo "Usage: $0 " + exit 1 +fi + +if [ ! -f "$SCRIPT" ]; then + echo "ERROR: Build script not found: $SCRIPT" + exit 1 +fi + +# Check what would be deleted (distclean items) +FILES_TO_DELETE="" +[ -f "CMakeCache.txt" ] && FILES_TO_DELETE="$FILES_TO_DELETE CMakeCache.txt" +[ -d "CMakeFiles" ] && FILES_TO_DELETE="$FILES_TO_DELETE CMakeFiles/" +[ -f "Makefile" ] && FILES_TO_DELETE="$FILES_TO_DELETE Makefile" +[ -f "cmake_install.cmake" ] && FILES_TO_DELETE="$FILES_TO_DELETE cmake_install.cmake" +[ -f "CTestTestfile.cmake" ] && FILES_TO_DELETE="$FILES_TO_DELETE CTestTestfile.cmake" + +# Find any other .cmake files (excluding those we might want to keep) +OTHER_CMAKE=$(find . -maxdepth 1 -name "*.cmake" -type f 2>/dev/null | \ + grep -v "cmake_install.cmake\|CTestTestfile.cmake" || true) +if [ -n "$OTHER_CMAKE" ]; then + FILES_TO_DELETE="$FILES_TO_DELETE $OTHER_CMAKE" +fi + +# Additional common CMake artifacts +[ -d "Testing" ] && FILES_TO_DELETE="$FILES_TO_DELETE Testing/" +[ -d "_deps" ] && FILES_TO_DELETE="$FILES_TO_DELETE _deps/" +[ -f "compile_commands.json" ] && FILES_TO_DELETE="$FILES_TO_DELETE compile_commands.json" + +# If there's nothing to clean, just run the script +if [ -z "$FILES_TO_DELETE" ]; then + echo "Directory is already clean, proceeding with build..." + echo "" +else + # Show what will be deleted + echo "==========================================" + echo "WARNING: About to perform 'distclean'" + echo "==========================================" + echo "This will delete all CMake configuration and build artifacts:" + echo "" + for f in $FILES_TO_DELETE; do + if [ -d "$f" ]; then + echo " - $f (directory)" + else + echo " - $f" + fi + done + echo "" + echo "Current directory: $(pwd)" + echo "" + echo "This operation matches the GNU make 'distclean' target:" + echo " \"Delete all files created by configuring or building\"" + echo "" + echo "Note: Install directories (if any) will NOT be deleted." + echo " Use 'cmake --install --prefix ...' to manage installations." + echo "" + + # Prompt user + read -p "Delete these files/directories? [y/N] " -n 1 -r + echo + + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Aborted by user. Not deleting anything." + echo "" + echo "To proceed without cleaning, run the build script directly:" + echo " bash $SCRIPT" + echo "" + echo "Modern CMake workflow reminder:" + echo " 1. Configure: cmake -S -B " + echo " 2. Build: cmake --build " + echo " 3. Install: cmake --install --prefix " + exit 1 + fi + + # Actually delete + echo "" + echo "Performing distclean..." + for f in $FILES_TO_DELETE; do + if [ -d "$f" ]; then + rm -rf "$f" && echo " ✓ Deleted directory: $f" + elif [ -f "$f" ]; then + rm -f "$f" && echo " ✓ Deleted file: $f" + fi + done + echo "" + echo "Distclean complete. Ready for fresh configuration." + echo "" +fi + +# Set ERF_DIR to the source tree regtest checked out for us +if [ -z "$ERF_DIR" ]; then + if [ -d "../source" ]; then + export ERF_DIR=$(cd ../source && pwd) + echo "Auto-detected ERF_DIR: $ERF_DIR" + else + echo "WARNING: Could not auto-detect ERF_DIR" + echo "Build script may fail if it requires ERF_DIR" + fi +fi + +# Run the actual build script +echo "==========================================" +echo "Running build script: $SCRIPT" +echo "==========================================" +echo "" + +bash "$SCRIPT" diff --git a/Build/wrapper_clean_build_auto.sh b/Build/wrapper_clean_build_auto.sh new file mode 100755 index 0000000000..a3f757555a --- /dev/null +++ b/Build/wrapper_clean_build_auto.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -e + +# ============================================================================ +# Automated CMake Build Wrapper with Cleanup (distclean equivalent) +# ============================================================================ +# Non-interactive version for CI/regression testing +# See wrapper_clean_build.sh for detailed documentation on: +# - Modern cmake -S -B workflow +# - Modern cmake --install --prefix usage +# - GNU distclean behavior +# ============================================================================ + +SCRIPT=$1 + +if [ -z "$SCRIPT" ]; then + echo "ERROR: No build script provided" + exit 1 +fi + +if [ ! -f "$SCRIPT" ]; then + echo "ERROR: Build script not found: $SCRIPT" + exit 1 +fi + +echo "==========================================" +echo "AUTO MODE: Performing distclean" +echo "==========================================" +echo "Deleting CMake configuration and build artifacts..." +echo "(Install directories NOT affected)" +echo "" + +# Delete all CMake artifacts (distclean equivalent) +rm -rf CMakeCache.txt CMakeFiles/ Makefile cmake_install.cmake \ + CTestTestfile.cmake Testing/ _deps/ compile_commands.json \ + *.cmake 2>/dev/null || true + +echo "✓ Cleaned: CMakeCache.txt, CMakeFiles/, Makefile, *.cmake, etc." +echo "✓ Directory ready for fresh configuration" +echo "" + +# Set ERF_DIR +if [ -z "$ERF_DIR" ]; then + if [ -d "../source" ]; then + export ERF_DIR=$(cd ../source && pwd) + echo "Auto-detected ERF_DIR: $ERF_DIR" + fi +fi + +echo "==========================================" +echo "Running build script: $SCRIPT" +echo "==========================================" +echo "" + +bash "$SCRIPT" diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake new file mode 100644 index 0000000000..5e474ca461 --- /dev/null +++ b/CMake/CrayDetection.cmake @@ -0,0 +1,618 @@ +# ============================================================================== +# Cray System Auto-Detection and Workarounds +# ============================================================================== +# This module detects Cray systems and automatically applies workarounds for +# common build issues. Each fix corresponds to a checklist item. +# +# Options: +# -DERF_DISABLE_CRAY_AUTO_FIXES=ON : Disable automatic Cray system fixes +# -DERF_VERBOSE_CRAY_FIXES=ON : Show detailed info for each fix +# ============================================================================== + +option(ERF_DISABLE_CRAY_AUTO_FIXES "Disable automatic Cray system fixes" OFF) +option(ERF_VERBOSE_CRAY_FIXES "Show verbose output for Cray fixes" OFF) + +# Helper macro for verbose messages +macro(erf_cray_verbose) + if(ERF_VERBOSE_CRAY_FIXES) + message(STATUS " [VERBOSE] ${ARGN}") + endif() +endmacro() + +if(ERF_DISABLE_CRAY_AUTO_FIXES) + message(STATUS "ERF: Cray auto-fixes disabled by user") + return() +endif() + +# ============================================================================== +# Detect Cray Environment +# ============================================================================== + +set(ERF_ON_CRAY FALSE) + +erf_cray_verbose("Checking for Cray environment...") + +# Check for Cray compiler wrappers +if(CMAKE_C_COMPILER MATCHES ".*cc$" AND + CMAKE_CXX_COMPILER MATCHES ".*CC$" AND + DEFINED ENV{CRAY_MPICH_DIR}) + set(ERF_ON_CRAY TRUE) + message(STATUS "ERF: Detected Cray system") + message(STATUS " CMAKE_C_COMPILER = ${CMAKE_C_COMPILER}") + message(STATUS " CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") + message(STATUS " CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") + erf_cray_verbose("Detection method: Cray compiler wrappers (cc, CC) + CRAY_MPICH_DIR") +endif() + +# Additional check for Cray environment variables +if(DEFINED ENV{CRAYPE_VERSION}) + set(ERF_ON_CRAY TRUE) + message(STATUS "ERF: Detected Cray Programming Environment") + message(STATUS " CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") + erf_cray_verbose("Detection method: CRAYPE_VERSION environment variable") +endif() + +if(NOT ERF_ON_CRAY) + message(STATUS "ERF: Not on a Cray system, skipping Cray-specific fixes") + erf_cray_verbose("CMAKE_C_COMPILER = ${CMAKE_C_COMPILER}") + erf_cray_verbose("CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") + erf_cray_verbose("CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") + erf_cray_verbose("CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") + return() +endif() + +# ============================================================================== +# Prerequisite Checks +# ============================================================================== + +message(STATUS "ERF: Checking Cray prerequisites...") + +# ----------------------------------------------------------------------------- +# CMake Version Check +# ----------------------------------------------------------------------------- +# Cray systems work best with CMake 3.24.5+ +# Earlier versions may have issues with Cray wrappers and CUDA + +set(ERF_RECOMMENDED_CMAKE_VERSION "3.24.5") + +if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) + message(WARNING "") + message(WARNING "ERF: CMake version ${CMAKE_VERSION} detected") + message(WARNING " Recommended minimum for Cray systems: ${ERF_RECOMMENDED_CMAKE_VERSION}") + message(WARNING " You may experience issues with Cray compiler wrappers and CUDA") + message(WARNING "") + message(WARNING " To fix:") + message(WARNING " module load cmake/3.30.2 # or later") + message(WARNING "") + + erf_cray_verbose("Current CMake: ${CMAKE_VERSION}") + erf_cray_verbose("Recommended: ${ERF_RECOMMENDED_CMAKE_VERSION}+") + erf_cray_verbose("Known issues with older CMake on Cray:") + erf_cray_verbose(" - CUDA language detection failures") + erf_cray_verbose(" - Incorrect compiler wrapper handling") + erf_cray_verbose(" - Missing Cray-specific find modules") +else() + message(STATUS " CMake version ${CMAKE_VERSION} >= ${ERF_RECOMMENDED_CMAKE_VERSION} ✓") + erf_cray_verbose("CMake version check passed") +endif() + +# ----------------------------------------------------------------------------- +# CUDA Toolkit Check +# ----------------------------------------------------------------------------- +# When building with CUDA, the cudatoolkit module should be loaded +# This sets CUDA_HOME and other necessary environment variables + +if(ERF_ENABLE_CUDA) + message(STATUS " Checking for CUDA toolkit...") + + set(CUDA_TOOLKIT_LOADED FALSE) + + # Check for CUDA_HOME (set by cudatoolkit module) + if(DEFINED ENV{CUDA_HOME}) + message(STATUS " CUDA_HOME = $ENV{CUDA_HOME} ✓") + set(CUDA_TOOLKIT_LOADED TRUE) + erf_cray_verbose("CUDA toolkit appears to be loaded (CUDA_HOME set)") + endif() + + # Additional check for CUDATOOLKIT_HOME (alternative Cray variable) + if(DEFINED ENV{CUDATOOLKIT_HOME}) + message(STATUS " CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME} ✓") + set(CUDA_TOOLKIT_LOADED TRUE) + erf_cray_verbose("CUDA toolkit appears to be loaded (CUDATOOLKIT_HOME set)") + endif() + + # Check for nvcc in PATH + find_program(NVCC_EXECUTABLE nvcc) + if(NVCC_EXECUTABLE) + message(STATUS " Found nvcc: ${NVCC_EXECUTABLE} ✓") + set(CUDA_TOOLKIT_LOADED TRUE) + erf_cray_verbose("nvcc found in PATH") + endif() + + # Warn if CUDA toolkit doesn't appear to be loaded + if(NOT CUDA_TOOLKIT_LOADED) + message(WARNING "") + message(WARNING "ERF: CUDA enabled but CUDA toolkit not detected") + message(WARNING " Expected environment variables not found:") + message(WARNING " - CUDA_HOME") + message(WARNING " - CUDATOOLKIT_HOME") + message(WARNING " - nvcc in PATH") + message(WARNING "") + message(WARNING " To fix:") + message(WARNING " module load cudatoolkit") + message(WARNING " Or on newer systems:") + message(WARNING " module load cuda") + message(WARNING "") + message(WARNING " Build may fail with CUDA-related errors") + message(WARNING "") + + erf_cray_verbose("CUDA_HOME = $ENV{CUDA_HOME}") + erf_cray_verbose("CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") + erf_cray_verbose("nvcc search result: ${NVCC_EXECUTABLE}") + endif() + + # Check CUDA architecture is set for GPU builds + if(NOT AMReX_CUDA_ARCH AND NOT DEFINED ENV{AMREX_CUDA_ARCH}) + message(WARNING "") + message(WARNING "ERF: CUDA enabled but GPU architecture not specified") + message(WARNING " Set AMReX_CUDA_ARCH for optimal performance") + message(WARNING " For Perlmutter A100 GPUs:") + message(WARNING " -DAMReX_CUDA_ARCH=8.0") + message(WARNING " Or set in environment:") + message(WARNING " export AMREX_CUDA_ARCH=8.0") + message(WARNING "") + + erf_cray_verbose("AMReX_CUDA_ARCH not set (will use CMake default)") + else() + if(AMReX_CUDA_ARCH) + message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH} ✓") + else() + message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} ✓") + endif() + endif() +else() + erf_cray_verbose("CUDA not enabled, skipping CUDA toolkit checks") +endif() + +# ----------------------------------------------------------------------------- +# NetCDF Module Check +# ----------------------------------------------------------------------------- + +if(ERF_ENABLE_NETCDF) + message(STATUS " Checking for NetCDF...") + + set(NETCDF_LOADED FALSE) + + if(DEFINED ENV{NETCDF_DIR}) + message(STATUS " NETCDF_DIR = $ENV{NETCDF_DIR} ✓") + set(NETCDF_LOADED TRUE) + endif() + + if(NOT NETCDF_LOADED) + message(WARNING "") + message(WARNING "ERF: NetCDF enabled but NETCDF_DIR not set") + message(WARNING " To fix:") + message(WARNING " module load cray-netcdf-hdf5parallel") + message(WARNING " Or:") + message(WARNING " module load cray-netcdf") + message(WARNING "") + + erf_cray_verbose("NETCDF_DIR not found in environment") + endif() +else() + erf_cray_verbose("NetCDF not enabled, skipping NetCDF checks") +endif() + +# ----------------------------------------------------------------------------- +# Module Environment Summary +# ----------------------------------------------------------------------------- + +if(ERF_VERBOSE_CRAY_FIXES) + message(STATUS "") + message(STATUS "[VERBOSE] Key environment variables:") + message(STATUS "[VERBOSE] CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") + message(STATUS "[VERBOSE] CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") + message(STATUS "[VERBOSE] MPICH_DIR = $ENV{MPICH_DIR}") + message(STATUS "[VERBOSE] CUDA_HOME = $ENV{CUDA_HOME}") + message(STATUS "[VERBOSE] CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") + message(STATUS "[VERBOSE] NETCDF_DIR = $ENV{NETCDF_DIR}") + message(STATUS "[VERBOSE] HDF5_DIR = $ENV{HDF5_DIR}") + message(STATUS "[VERBOSE] MPICH_GPU_SUPPORT = $ENV{MPICH_GPU_SUPPORT_ENABLED}") + message(STATUS "") +endif() + +message(STATUS "") + +# ============================================================================== +# Fix 1: CUDA + EKAT -> nvcc_wrapper complications (Checklist Item 1) +# ============================================================================== +# PROBLEM: When building with EKAT, we get nvcc_wrapper which can cause +# "mpi.h not found" errors because nvcc_wrapper doesn't know about +# Cray's include paths +# SOLUTION: Add Cray compiler flags to CUDA compilation via --cray-print-opts + +if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3)) + message(STATUS "ERF: [Fix 1] Applying CUDA+EKAT nvcc_wrapper fix") + + erf_cray_verbose("Problem: EKAT uses nvcc_wrapper which doesn't inherit Cray paths") + erf_cray_verbose("Condition: ERF_ENABLE_CUDA=ON and (RRTMGP or SHOC or P3 enabled)") + erf_cray_verbose("Solution: Add Cray-specific flags from 'CC --cray-print-opts=cflags'") + + # Get Cray-specific flags + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags + OUTPUT_VARIABLE CRAY_CUDA_FLAGS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + RESULT_VARIABLE CRAY_CUDA_FLAGS_RESULT + ) + + if(CRAY_CUDA_FLAGS_RESULT EQUAL 0 AND CRAY_CUDA_FLAGS) + message(STATUS " Adding Cray flags to CUDA compilation") + erf_cray_verbose("Retrieved flags: ${CRAY_CUDA_FLAGS}") + erf_cray_verbose("Command used: ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags") + + if(CMAKE_CUDA_FLAGS) + erf_cray_verbose("Appending to existing CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CRAY_CUDA_FLAGS}" CACHE STRING "" FORCE) + else() + erf_cray_verbose("Setting new CMAKE_CUDA_FLAGS") + set(CMAKE_CUDA_FLAGS "${CRAY_CUDA_FLAGS}" CACHE STRING "" FORCE) + endif() + + erf_cray_verbose("Final CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") + else() + message(WARNING "ERF: Could not retrieve Cray CUDA flags") + message(WARNING " Command attempted: ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags") + message(WARNING " Return code: ${CRAY_CUDA_FLAGS_RESULT}") + message(WARNING " You may need to set CMAKE_CUDA_FLAGS manually") + message(WARNING " Example: -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") + endif() +else() + erf_cray_verbose("Fix 1 not needed (CUDA+EKAT not both enabled)") +endif() + +# ============================================================================== +# Fix 2: FCOMPARE + Cray -> mpi_gnu_123 not found (Checklist Item 2) +# ============================================================================== +# PROBLEM: When building with fcompare, Cray's --as-needed linker flag causes +# the linker to drop MPI libraries it thinks aren't needed, leading to +# "cannot find -lmpi_gnu_123" errors +# SOLUTION: Remove --as-needed from Cray library flags and add --no-as-needed + +if(ERF_ENABLE_FCOMPARE) + message(STATUS "ERF: [Fix 2] Applying fcompare linker fix") + + erf_cray_verbose("Problem: Cray uses --as-needed which drops required MPI libs") + erf_cray_verbose("Condition: ERF_ENABLE_FCOMPARE=ON") + erf_cray_verbose("Solution: Clean Cray lib flags and add --no-as-needed") + + # Get Cray library paths and clean them + set(CRAY_LIBS_CLEAN "") + set(COMPILERS_CHECKED "") + + foreach(COMPILER IN ITEMS ${CMAKE_CXX_COMPILER} ${CMAKE_C_COMPILER} ${CMAKE_Fortran_COMPILER}) + if(EXISTS ${COMPILER}) + erf_cray_verbose("Checking compiler: ${COMPILER}") + + execute_process( + COMMAND ${COMPILER} --cray-print-opts=libs + OUTPUT_VARIABLE COMPILER_LIBS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + RESULT_VARIABLE COMPILER_LIBS_RESULT + ) + + if(COMPILER_LIBS_RESULT EQUAL 0) + erf_cray_verbose(" Original libs: ${COMPILER_LIBS}") + + # Remove problematic --as-needed flags + string(REGEX REPLACE "-Wl,--as-needed," "" COMPILER_LIBS "${COMPILER_LIBS}") + string(REGEX REPLACE ",--no-as-needed" "" COMPILER_LIBS "${COMPILER_LIBS}") + string(REGEX REPLACE ",-l" " -l" COMPILER_LIBS "${COMPILER_LIBS}") + + erf_cray_verbose(" Cleaned libs: ${COMPILER_LIBS}") + + set(CRAY_LIBS_CLEAN "${CRAY_LIBS_CLEAN} ${COMPILER_LIBS}") + list(APPEND COMPILERS_CHECKED ${COMPILER}) + else() + erf_cray_verbose(" Failed to get libs from ${COMPILER}") + endif() + endif() + endforeach() + + if(CRAY_LIBS_CLEAN) + message(STATUS " Adding Cray linker flags: -Wl,--no-as-needed + libs") + erf_cray_verbose("Compilers checked: ${COMPILERS_CHECKED}") + erf_cray_verbose("Combined cleaned libs: ${CRAY_LIBS_CLEAN}") + erf_cray_verbose("Final linker flags: -Wl,--no-as-needed ${CRAY_LIBS_CLEAN}") + + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed ${CRAY_LIBS_CLEAN}" + CACHE STRING "" FORCE) + + erf_cray_verbose("CMAKE_EXE_LINKER_FLAGS updated") + else() + message(WARNING "ERF: Could not retrieve Cray library paths") + message(WARNING " Fcompare may fail to link with: cannot find -lmpi_gnu_123") + message(WARNING " Workaround: Set CMAKE_EXE_LINKER_FLAGS manually") + message(WARNING " Example: -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed \$CRAY_LIBS_CLEAN\"") + erf_cray_verbose("No compilers returned valid library flags") + endif() +else() + erf_cray_verbose("Fix 2 not needed (ERF_ENABLE_FCOMPARE=OFF)") +endif() + +# ============================================================================== +# Fix 3: CUDA without cmake module -> math libs not found (Checklist Item 3) +# ============================================================================== +# PROBLEM: If 'module load cmake' isn't run, CMAKE_PREFIX_PATH may not include +# CUDA math libraries path, causing link errors for cuBLAS, cuRAND, etc. +# SOLUTION: Add $CUDA_HOME/../../math_libs/lib64 to CMAKE_PREFIX_PATH + +if(ERF_ENABLE_CUDA AND DEFINED ENV{CUDA_HOME}) + set(CUDA_MATH_PATH "$ENV{CUDA_HOME}/../../math_libs/lib64") + + erf_cray_verbose("Checking for CUDA math libraries...") + erf_cray_verbose("CUDA_HOME = $ENV{CUDA_HOME}") + erf_cray_verbose("Expected math libs path: ${CUDA_MATH_PATH}") + + if(EXISTS ${CUDA_MATH_PATH}) + message(STATUS "ERF: [Fix 3] Adding CUDA math libraries path") + message(STATUS " ${CUDA_MATH_PATH}") + + erf_cray_verbose("Problem: CUDA math libs may not be in default search path") + erf_cray_verbose("Condition: ERF_ENABLE_CUDA=ON and CUDA_HOME set") + erf_cray_verbose("Solution: Add CUDA_HOME/../../math_libs/lib64 to CMAKE_PREFIX_PATH") + erf_cray_verbose("Path exists: YES") + + list(APPEND CMAKE_PREFIX_PATH ${CUDA_MATH_PATH}) + set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} CACHE STRING "" FORCE) + + erf_cray_verbose("CMAKE_PREFIX_PATH updated: ${CMAKE_PREFIX_PATH}") + else() + message(WARNING "ERF: CUDA math libs path not found at ${CUDA_MATH_PATH}") + message(WARNING " You may need to 'module load cuda' or set CMAKE_PREFIX_PATH manually") + message(WARNING " Expected libraries: cuBLAS, cuRAND, cuSPARSE, etc.") + erf_cray_verbose("Path exists: NO") + erf_cray_verbose("This may cause link errors for CUDA math libraries") + endif() +else() + if(ERF_ENABLE_CUDA AND NOT DEFINED ENV{CUDA_HOME}) + message(WARNING "ERF: CUDA enabled but CUDA_HOME not set") + message(WARNING " Math libraries may not be found") + message(WARNING " Solution: Load CUDA module or set CUDA_HOME") + erf_cray_verbose("CUDA_HOME not set in environment") + else() + erf_cray_verbose("Fix 3 not needed (ERF_ENABLE_CUDA=OFF)") + endif() +endif() + +# ============================================================================== +# Fix 4: GPU-aware MPI with Cray GTL (Checklist Item 4) +# ============================================================================== +# PROBLEM: GPU-aware MPI on Cray requires linking against mpi_gtl_cuda library +# which enables GPU Transfer Library for direct GPU-GPU communication +# SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags + +if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) + message(STATUS "ERF: [Fix 4] Applying GPU-aware MPI fix (Cray GTL)") + + erf_cray_verbose("Problem: GPU-aware MPI needs Cray GTL libraries") + erf_cray_verbose("Condition: CUDA + MPI + MPICH_GPU_SUPPORT_ENABLED=1") + erf_cray_verbose("Solution: Add -lmpi_gnu_123 -lmpi_gtl_cuda to link flags") + erf_cray_verbose("MPICH_GPU_SUPPORT_ENABLED = $ENV{MPICH_GPU_SUPPORT_ENABLED}") + + # Detect MPI library version for correct library name + # TODO: Could auto-detect the actual version instead of hardcoding _gnu_123 + set(CRAY_MPI_LIBS "-lmpi_gnu_123 -lmpi_gtl_cuda") + + erf_cray_verbose("Looking for mpi_gtl_cuda library...") + if(DEFINED ENV{MPICH_DIR}) + erf_cray_verbose("MPICH_DIR = $ENV{MPICH_DIR}") + endif() + + # Check if the libraries exist + find_library(CRAY_MPI_GTL_CUDA mpi_gtl_cuda HINTS ENV MPICH_DIR PATH_SUFFIXES lib) + + if(CRAY_MPI_GTL_CUDA) + message(STATUS " Found Cray GPU-aware MPI library: ${CRAY_MPI_GTL_CUDA}") + erf_cray_verbose("Adding to CMAKE_CUDA_STANDARD_LIBRARIES") + erf_cray_verbose("Adding to CMAKE_CXX_STANDARD_LIBRARIES") + + set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + + erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") + else() + message(WARNING "ERF: GPU-aware MPI requested but GTL libraries not found") + message(WARNING " Set MPICH_GPU_SUPPORT_ENABLED=1 and check MPICH_DIR") + message(WARNING " Manual workaround: -DCMAKE_CUDA_STANDARD_LIBRARIES=\"${CRAY_MPI_LIBS}\"") + message(WARNING " -DCMAKE_CXX_STANDARD_LIBRARIES=\"${CRAY_MPI_LIBS}\"") + erf_cray_verbose("Search paths: $ENV{MPICH_DIR}/lib") + erf_cray_verbose("Library not found: mpi_gtl_cuda") + endif() +else() + if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI) + erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED not set") + erf_cray_verbose("Set MPICH_GPU_SUPPORT_ENABLED=1 to enable GPU-aware MPI") + else() + erf_cray_verbose("Fix 4 not needed (CUDA+MPI not both enabled)") + endif() +endif() + +# ============================================================================== +# Fix 5-6: NetCDF with cray-netcdf-hdf5parallel (Checklist Items 5-6) +# ============================================================================== +# PROBLEM 5: Cray NetCDF may use different C++ library names or structures +# PROBLEM 6: pkg-config may not find MPI/NetCDF without correct PKG_CONFIG_PATH +# SOLUTION: Set up pkg-config path and add NetCDF/HDF5 directories to search + +if(ERF_ENABLE_NETCDF AND DEFINED ENV{MPICH_DIR}) + message(STATUS "ERF: [Fix 5-6] Configuring NetCDF with Cray paths") + + erf_cray_verbose("Problem 5: Cray NetCDF may have non-standard library names") + erf_cray_verbose("Problem 6: pkg-config needs MPICH_DIR in PKG_CONFIG_PATH") + erf_cray_verbose("Condition: ERF_ENABLE_NETCDF=ON and MPICH_DIR set") + erf_cray_verbose("Solution: Set PKG_CONFIG_PATH and add search paths") + + # Add MPICH pkg-config path + set(PKG_CONFIG_PATH "$ENV{MPICH_DIR}/lib/pkgconfig") + if(DEFINED ENV{PKG_CONFIG_PATH}) + set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") + erf_cray_verbose("Appending to existing PKG_CONFIG_PATH") + else() + erf_cray_verbose("Creating new PKG_CONFIG_PATH") + endif() + set(ENV{PKG_CONFIG_PATH} ${PKG_CONFIG_PATH}) + + message(STATUS " PKG_CONFIG_PATH = ${PKG_CONFIG_PATH}") + erf_cray_verbose("This allows cmake/gnumake to find MPI and NetCDF via pkg-config") + + # Help find NetCDF (may be named differently on Cray) + if(DEFINED ENV{NETCDF_DIR}) + list(APPEND CMAKE_PREFIX_PATH $ENV{NETCDF_DIR}) + message(STATUS " Added NETCDF_DIR to search path: $ENV{NETCDF_DIR}") + erf_cray_verbose("NetCDF headers/libs will be searched in NETCDF_DIR") + else() + erf_cray_verbose("NETCDF_DIR not set (may still work via module)") + endif() + + if(DEFINED ENV{HDF5_DIR}) + list(APPEND CMAKE_PREFIX_PATH $ENV{HDF5_DIR}) + message(STATUS " Added HDF5_DIR to search path: $ENV{HDF5_DIR}") + erf_cray_verbose("HDF5 headers/libs will be searched in HDF5_DIR") + else() + erf_cray_verbose("HDF5_DIR not set (may still work via module)") + endif() + + erf_cray_verbose("CMAKE_PREFIX_PATH now includes: ${CMAKE_PREFIX_PATH}") +else() + if(ERF_ENABLE_NETCDF) + erf_cray_verbose("Fix 5-6 not fully applied: MPICH_DIR not set") + message(WARNING "ERF: NetCDF enabled but MPICH_DIR not set") + message(WARNING " pkg-config may not find MPI libraries") + message(WARNING " Load MPI module or set MPICH_DIR") + else() + erf_cray_verbose("Fix 5-6 not needed (ERF_ENABLE_NETCDF=OFF)") + endif() +endif() + +# ============================================================================== +# Summary +# ============================================================================== + +message(STATUS "") +message(STATUS "ERF: Cray system fixes summary") +message(STATUS "══════════════════════════════════════════════════════════════") + +# Fix 1: CUDA + EKAT +set(FIX1_ACTIVE OFF) +if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3)) + set(FIX1_ACTIVE ON) +endif() +message(STATUS " Fix 1 (CUDA+EKAT): ${FIX1_ACTIVE}") +if(FIX1_ACTIVE AND CRAY_CUDA_FLAGS) + message(STATUS " Applied Cray CUDA flags:") + message(STATUS " ${CRAY_CUDA_FLAGS}") + message(STATUS "") + message(STATUS " Command line equivalent:") + message(STATUS " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") +endif() + +# Fix 2: fcompare +message(STATUS "") +message(STATUS " Fix 2 (fcompare): ${ERF_ENABLE_FCOMPARE}") +if(ERF_ENABLE_FCOMPARE AND CRAY_LIBS_CLEAN) + message(STATUS " Applied Cray library cleanup:") + message(STATUS " ${CRAY_LIBS_CLEAN}") + message(STATUS "") + message(STATUS " Command line equivalent:") + message(STATUS " CRAY_LIBS=\"\$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')\"") + message(STATUS " CRAY_LIBS=\"\$CRAY_LIBS \$(cc --cray-print-opts=libs | sed ...)\"") + message(STATUS " CRAY_LIBS=\"\$CRAY_LIBS \$(ftn --cray-print-opts=libs | sed ...)\"") + message(STATUS " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed \$CRAY_LIBS\"") + message(STATUS "") + message(STATUS " What was actually set:") + message(STATUS " CMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed ${CRAY_LIBS_CLEAN}\"") +endif() + +# Fix 3: CUDA math libs +set(FIX3_ACTIVE OFF) +if(ERF_ENABLE_CUDA AND DEFINED ENV{CUDA_HOME}) + set(CUDA_MATH_PATH_CHECK "$ENV{CUDA_HOME}/../../math_libs/lib64") + if(EXISTS ${CUDA_MATH_PATH_CHECK}) + set(FIX3_ACTIVE ON) + endif() +endif() +message(STATUS "") +message(STATUS " Fix 3 (CUDA math): ${FIX3_ACTIVE}") +if(FIX3_ACTIVE) + message(STATUS " Command line equivalent:") + message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\"") +endif() + +# Fix 4: GPU-aware MPI +set(FIX4_ACTIVE OFF) +if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) + if(CRAY_MPI_GTL_CUDA) + set(FIX4_ACTIVE ON) + endif() +endif() +message(STATUS "") +message(STATUS " Fix 4 (GPU-aware MPI): ${FIX4_ACTIVE}") +if(FIX4_ACTIVE) + message(STATUS " Command line equivalent:") + message(STATUS " export MPICH_GPU_SUPPORT_ENABLED=1") + message(STATUS " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\"") + message(STATUS " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\"") +endif() + +# Fix 5-6: NetCDF +set(FIX56_ACTIVE OFF) +if(ERF_ENABLE_NETCDF AND DEFINED ENV{MPICH_DIR}) + set(FIX56_ACTIVE ON) +endif() +message(STATUS "") +message(STATUS " Fix 5-6 (NetCDF): ${FIX56_ACTIVE}") +if(FIX56_ACTIVE) + message(STATUS " Command line equivalent:") + message(STATUS " export PKG_CONFIG_PATH=\"\$MPICH_DIR/lib/pkgconfig:\$PKG_CONFIG_PATH\"") + if(DEFINED ENV{NETCDF_DIR}) + message(STATUS " -DCMAKE_PREFIX_PATH=\"\$NETCDF_DIR\"") + endif() + if(DEFINED ENV{HDF5_DIR}) + message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CMAKE_PREFIX_PATH:\$HDF5_DIR\"") + endif() +endif() + +message(STATUS "") +message(STATUS "══════════════════════════════════════════════════════════════") +message(STATUS " To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") +message(STATUS " To see verbose output: -DERF_VERBOSE_CRAY_FIXES=ON") +message(STATUS " To override any fix: Set the corresponding CMAKE_* variable explicitly") +message(STATUS "") +message(STATUS " Complete manual equivalent (all active fixes):") +message(STATUS " ------------------------------------------------") +if(FIX1_ACTIVE) +message(STATUS " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\" \\") +endif() +if(ERF_ENABLE_FCOMPARE AND CRAY_LIBS_CLEAN) +message(STATUS " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed ${CRAY_LIBS_CLEAN}\" \\") +endif() +if(FIX3_ACTIVE) +message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\" \\") +endif() +if(FIX4_ACTIVE) +message(STATUS " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\" \\") +message(STATUS " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\" \\") +endif() +message(STATUS "") + +if(ERF_VERBOSE_CRAY_FIXES) + message(STATUS "[VERBOSE] All Cray fixes processing complete") + message(STATUS "[VERBOSE] Review messages above for detailed information") + message(STATUS "[VERBOSE] The command-line equivalents above show what this module does automatically") +endif() \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3331215cfc..cd8ecac4e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,9 @@ endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake") include(CMakePackageConfigHelpers) +# Include Cray/Perlmutter auto-detection and fixes +include(CrayDetection) + ########################## OPTIONS ##################################### #General options for all executables in the project From ccd145744d66fffdca44951b7ce7470ed579ee9d Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 11:47:52 -0800 Subject: [PATCH 06/44] Remove module versions, tweak minimum and cuda --- CMake/CrayDetection.cmake | 119 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 4 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 5e474ca461..6a1a3c72a0 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -61,6 +61,117 @@ if(NOT ERF_ON_CRAY) return() endif() +# ============================================================================== +# Compiler Version Checks +# ============================================================================== + +message(STATUS "ERF: Checking compiler versions...") + +# ----------------------------------------------------------------------------- +# GCC Version Check (for std::filesystem support) +# ----------------------------------------------------------------------------- +# ERF uses C++17 which requires GCC 8.0+ +# Older GCC versions will fail with "fatal error: filesystem: No such file" + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + message(STATUS " Detected GNU C++ compiler version: ${CMAKE_CXX_COMPILER_VERSION}") + + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.0") + message(FATAL_ERROR + "\n" + "════════════════════════════════════════════════════════════════\n" + "ERF requires GCC 8.0+ for C++17 support\n" + "Found: GCC ${CMAKE_CXX_COMPILER_VERSION}\n" + "════════════════════════════════════════════════════════════════\n" + "\n" + "On Cray systems, fix by using the Cray wrapper with a modern compiler:\n" + " 1. Load a newer compiler module:\n" + " module load PrgEnv-gnu\n" + " module load gcc\n" + "\n" + " 2. Set compiler explicitly:\n" + " -DCMAKE_CXX_COMPILER=\$(which CC)\n" + " Or set environment variable:\n" + " export CXX=\$(which CC)\n" + "\n" + " 3. Verify compiler version:\n" + " CC --version\n" + "\n") + else() + message(STATUS " GCC version ${CMAKE_CXX_COMPILER_VERSION} >= 8.0 ✓") + erf_cray_verbose("GCC version sufficient for C++17 ") + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + message(STATUS " Detected Cray C++ compiler version: ${CMAKE_CXX_COMPILER_VERSION}") + erf_cray_verbose("Cray compiler wrappers detected") + + # Cray wrappers forward to underlying compiler - check what's loaded + if(DEFINED ENV{PE_ENV}) + message(STATUS " Programming Environment: $ENV{PE_ENV}") + erf_cray_verbose("PE_ENV = $ENV{PE_ENV}") + endif() +else() + message(STATUS " Detected C++ compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") +endif() + +# ----------------------------------------------------------------------------- +# GPU Compiler Checks (for CUDA builds) +# ----------------------------------------------------------------------------- +# Kokkos and EKAT read CMAKE_CUDA_COMPILER and CMAKE_CUDA_FLAGS +# We need to ensure these are set correctly for Cray systems + +if(ERF_ENABLE_CUDA) + message(STATUS "") + message(STATUS "ERF: Checking GPU compiler configuration...") + + # Check if CMAKE_CUDA_COMPILER is set + if(CMAKE_CUDA_COMPILER) + message(STATUS " CMAKE_CUDA_COMPILER = ${CMAKE_CUDA_COMPILER}") + erf_cray_verbose("CUDA compiler explicitly set by user or CMake") + else() + message(STATUS " CMAKE_CUDA_COMPILER not set (CMake will auto-detect)") + erf_cray_verbose("CMake will search for nvcc in PATH") + endif() + + # Check if CMAKE_CUDA_FLAGS has been set + if(CMAKE_CUDA_FLAGS) + message(STATUS " CMAKE_CUDA_FLAGS = ${CMAKE_CUDA_FLAGS}") + erf_cray_verbose("CUDA flags explicitly set by user") + else() + message(STATUS " CMAKE_CUDA_FLAGS not set (will be auto-configured)") + erf_cray_verbose("Cray-specific CUDA flags will be added by Fix 1 if needed") + endif() + + # Check AMReX_CUDA_ARCH + if(AMReX_CUDA_ARCH) + message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH}") + erf_cray_verbose("GPU architecture explicitly set") + elseif(DEFINED ENV{AMREX_CUDA_ARCH}) + message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} (from environment)") + erf_cray_verbose("GPU architecture from environment variable") + else() + message(WARNING "") + message(WARNING "ERF: AMReX_CUDA_ARCH not set") + message(WARNING " CMake will use default architecture (may not be optimal)") + message(WARNING " For Perlmutter A100 GPUs, set:") + message(WARNING " -DAMReX_CUDA_ARCH=8.0") + message(WARNING "") + erf_cray_verbose("GPU architecture not set - CMake will use defaults") + endif() + + # Important note about Kokkos/EKAT + if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) + message(STATUS "") + message(STATUS " Note: EKAT/Kokkos will use:") + message(STATUS " - CMAKE_CUDA_COMPILER for nvcc detection") + message(STATUS " - CMAKE_CUDA_FLAGS for compilation flags") + message(STATUS " - Fix 1 will add Cray-specific flags automatically") + erf_cray_verbose("EKAT-based physics enabled - Kokkos will read CMAKE_CUDA_* variables") + endif() + + message(STATUS "") +endif() + # ============================================================================== # Prerequisite Checks # ============================================================================== @@ -70,10 +181,10 @@ message(STATUS "ERF: Checking Cray prerequisites...") # ----------------------------------------------------------------------------- # CMake Version Check # ----------------------------------------------------------------------------- -# Cray systems work best with CMake 3.24.5+ -# Earlier versions may have issues with Cray wrappers and CUDA +# Cray systems work best with CMake 3.24.0+ +# Earlier versions may have issues with Cray wrappers and CUDA when NVHPC is splayed -set(ERF_RECOMMENDED_CMAKE_VERSION "3.24.5") +set(ERF_RECOMMENDED_CMAKE_VERSION "3.24.0") if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) message(WARNING "") @@ -82,7 +193,7 @@ if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) message(WARNING " You may experience issues with Cray compiler wrappers and CUDA") message(WARNING "") message(WARNING " To fix:") - message(WARNING " module load cmake/3.30.2 # or later") + message(WARNING " module load cmake") message(WARNING "") erf_cray_verbose("Current CMake: ${CMAKE_VERSION}") From 5b9a5b2e0e2f25eec9bdd54c011add9712850c41 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 11:50:35 -0800 Subject: [PATCH 07/44] Add more cuda kokkos detection --- CMake/CrayDetection.cmake | 149 ++++++++++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 16 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 6a1a3c72a0..157ac2691a 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -142,31 +142,148 @@ if(ERF_ENABLE_CUDA) erf_cray_verbose("Cray-specific CUDA flags will be added by Fix 1 if needed") endif() - # Check AMReX_CUDA_ARCH + # ------------------------------------------------------------------------- + # Detect AMReX CUDA architecture + # Priority: CMake var > AMREX_CUDA_ARCH env > CMAKE_CUDA_ARCH env > CRAY_ACCEL_TARGET + # ------------------------------------------------------------------------- + if(AMReX_CUDA_ARCH) - message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH}") - erf_cray_verbose("GPU architecture explicitly set") + message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH} (user specified)") + erf_cray_verbose("AMReX CUDA arch set via CMake variable") + elseif(DEFINED ENV{AMREX_CUDA_ARCH}) - message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} (from environment)") - erf_cray_verbose("GPU architecture from environment variable") + set(AMReX_CUDA_ARCH "$ENV{AMREX_CUDA_ARCH}" CACHE STRING "CUDA arch from AMREX_CUDA_ARCH") + message(STATUS " AMReX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} (from AMREX_CUDA_ARCH)") + erf_cray_verbose("AMReX CUDA arch from AMREX_CUDA_ARCH environment variable") + + elseif(DEFINED ENV{CMAKE_CUDA_ARCH}) + # Common in build scripts: CMAKE_CUDA_ARCH="80" + set(ENV_CUDA_ARCH "$ENV{CMAKE_CUDA_ARCH}") + + # Convert to AMReX format (add decimal point if needed) + if(ENV_CUDA_ARCH MATCHES "^[0-9][0-9]$") + # Two-digit format: 70, 80, 90 -> 7.0, 8.0, 9.0 + string(SUBSTRING "${ENV_CUDA_ARCH}" 0 1 MAJOR) + string(SUBSTRING "${ENV_CUDA_ARCH}" 1 1 MINOR) + set(DETECTED_CUDA_ARCH "${MAJOR}.${MINOR}") + else() + # Already in decimal format or other format + set(DETECTED_CUDA_ARCH "${ENV_CUDA_ARCH}") + endif() + + set(AMReX_CUDA_ARCH "${DETECTED_CUDA_ARCH}" CACHE STRING "CUDA arch from CMAKE_CUDA_ARCH") + message(STATUS " AMReX_CUDA_ARCH = ${DETECTED_CUDA_ARCH} (from CMAKE_CUDA_ARCH=${ENV_CUDA_ARCH})") + erf_cray_verbose("Converted CMAKE_CUDA_ARCH=${ENV_CUDA_ARCH} -> AMReX_CUDA_ARCH=${DETECTED_CUDA_ARCH}") + + elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) + # Auto-detect from Cray accelerator module (set by 'module load gpu') + set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + message(STATUS " Detected CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + + if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") + set(AMReX_CUDA_ARCH "7.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_CUDA_ARCH = 7.0 (Tesla V100 from CRAY_ACCEL_TARGET)") + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") + set(AMReX_CUDA_ARCH "8.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_CUDA_ARCH = 8.0 (A100 from CRAY_ACCEL_TARGET)") + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") + set(AMReX_CUDA_ARCH "9.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_CUDA_ARCH = 9.0 (H100 from CRAY_ACCEL_TARGET)") + else() + message(WARNING "ERF: Unknown CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + endif() + erf_cray_verbose("AMReX CUDA arch from CRAY_ACCEL_TARGET module variable") else() message(WARNING "") - message(WARNING "ERF: AMReX_CUDA_ARCH not set") - message(WARNING " CMake will use default architecture (may not be optimal)") - message(WARNING " For Perlmutter A100 GPUs, set:") - message(WARNING " -DAMReX_CUDA_ARCH=8.0") + message(WARNING "ERF: AMReX_CUDA_ARCH not detected") + message(WARNING " For Perlmutter: module load gpu") + message(WARNING " Or set: export CMAKE_CUDA_ARCH=80") + message(WARNING " Or set: -DAMReX_CUDA_ARCH=8.0") message(WARNING "") - erf_cray_verbose("GPU architecture not set - CMake will use defaults") endif() - # Important note about Kokkos/EKAT + # ------------------------------------------------------------------------- + # Detect Kokkos architecture (for EKAT builds) + # Priority: CMake var > KOKKOS_GPU_ARCH env > CRAY_ACCEL_TARGET + # ------------------------------------------------------------------------- + if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) message(STATUS "") - message(STATUS " Note: EKAT/Kokkos will use:") - message(STATUS " - CMAKE_CUDA_COMPILER for nvcc detection") - message(STATUS " - CMAKE_CUDA_FLAGS for compilation flags") - message(STATUS " - Fix 1 will add Cray-specific flags automatically") - erf_cray_verbose("EKAT-based physics enabled - Kokkos will read CMAKE_CUDA_* variables") + message(STATUS " EKAT-based physics enabled, checking Kokkos architecture...") + + # Check if user already set Kokkos_ARCH_* via CMake + set(KOKKOS_ARCH_SET FALSE) + if(Kokkos_ARCH_VOLTA70 OR Kokkos_ARCH_AMPERE80 OR Kokkos_ARCH_HOPPER90) + set(KOKKOS_ARCH_SET TRUE) + message(STATUS " Kokkos_ARCH_* already set by user") + erf_cray_verbose("User specified Kokkos architecture via CMake variable") + + elseif(DEFINED ENV{KOKKOS_GPU_ARCH}) + # Detect from KOKKOS_GPU_ARCH environment variable (build scripts) + set(KOKKOS_GPU_ARCH_ENV "$ENV{KOKKOS_GPU_ARCH}") + message(STATUS " Detected KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") + + # Map to Kokkos_ARCH_* CMake variable + if(KOKKOS_GPU_ARCH_ENV STREQUAL "VOLTA70") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VOLTA70 -> Kokkos_ARCH_VOLTA70=ON") + + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "AMPERE80") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=AMPERE80 -> Kokkos_ARCH_AMPERE80=ON") + + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "HOPPER90") + set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=HOPPER90 -> Kokkos_ARCH_HOPPER90=ON") + + else() + message(WARNING "ERF: Unknown KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") + message(WARNING " Expected: VOLTA70, AMPERE80, or HOPPER90") + endif() + + elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) + # Fall back to CRAY_ACCEL_TARGET (set by 'module load gpu') + set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + + if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia70 -> Kokkos_ARCH_VOLTA70=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia80 -> Kokkos_ARCH_AMPERE80=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") + set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia90 -> Kokkos_ARCH_HOPPER90=ON") + endif() + endif() + + if(NOT KOKKOS_ARCH_SET) + message(WARNING "") + message(WARNING "ERF: Kokkos architecture not detected") + message(WARNING " For Perlmutter: module load gpu") + message(WARNING " Or set: export KOKKOS_GPU_ARCH=AMPERE80") + message(WARNING " Or set: -DKokkos_ARCH_AMPERE80=ON") + message(WARNING "") + else() + message(STATUS "") + message(STATUS " Note: After Kokkos configures, CMAKE_CUDA_ARCHITECTURES") + message(STATUS " will be set from Kokkos_CUDA_ARCHITECTURES") + erf_cray_verbose("Kokkos will set CMAKE_CUDA_ARCHITECTURES when CUDA language is enabled") + endif() endif() message(STATUS "") From 13b7aad34c48a69b3ff0da4c238da624766ba37c Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 12:30:48 -0800 Subject: [PATCH 08/44] Haven't tested compiler detection workaround --- CMake/CrayCompilerDetection.cmake | 78 +++++++++++++++++++++++++++++++ CMakeLists.txt | 4 ++ 2 files changed, 82 insertions(+) create mode 100644 CMake/CrayCompilerDetection.cmake diff --git a/CMake/CrayCompilerDetection.cmake b/CMake/CrayCompilerDetection.cmake new file mode 100644 index 0000000000..438a60efbb --- /dev/null +++ b/CMake/CrayCompilerDetection.cmake @@ -0,0 +1,78 @@ +# ============================================================================== +# Cray Compiler Detection (Pre-Project Stage) +# ============================================================================== +# This file runs BEFORE project() to detect Cray systems and set compilers +# The main CrayDetection.cmake runs AFTER project() to apply build fixes +# ============================================================================== + +# Skip if user already set compilers explicitly +if(CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER OR CMAKE_Fortran_COMPILER) + message(STATUS "ERF: Compilers already specified by user, skipping Cray auto-detection") + return() +endif() + +# ----------------------------------------------------------------------------- +# Detect Cray Environment (using only environment variables) +# ----------------------------------------------------------------------------- + +set(ERF_ON_CRAY_PREPROJECT FALSE) + +# Check for Cray Programming Environment +if(DEFINED ENV{CRAYPE_VERSION}) + set(ERF_ON_CRAY_PREPROJECT TRUE) + message(STATUS "ERF: Detected Cray Programming Environment (CRAYPE_VERSION=$ENV{CRAYPE_VERSION})") +endif() + +# Additional check for Cray MPI +if(DEFINED ENV{CRAY_MPICH_DIR}) + set(ERF_ON_CRAY_PREPROJECT TRUE) + message(STATUS "ERF: Detected Cray MPI (CRAY_MPICH_DIR=$ENV{CRAY_MPICH_DIR})") +endif() + +# Check for Cray compiler module +if(DEFINED ENV{PE_ENV}) + set(ERF_ON_CRAY_PREPROJECT TRUE) + message(STATUS "ERF: Detected Cray Programming Environment: $ENV{PE_ENV}") +endif() + +if(NOT ERF_ON_CRAY_PREPROJECT) + # Not on Cray, skip compiler setup + return() +endif() + +# ----------------------------------------------------------------------------- +# Set Cray Compiler Wrappers as Defaults +# ----------------------------------------------------------------------------- + +message(STATUS "ERF: Setting Cray compiler wrappers...") + +# Find Cray C compiler wrapper +find_program(ERF_CRAY_CC cc) +if(ERF_CRAY_CC) + set(CMAKE_C_COMPILER "${ERF_CRAY_CC}" CACHE FILEPATH "C compiler") + message(STATUS " Set CMAKE_C_COMPILER = ${ERF_CRAY_CC}") +else() + message(WARNING "ERF: On Cray system but 'cc' wrapper not found in PATH") +endif() + +# Find Cray C++ compiler wrapper +find_program(ERF_CRAY_CXX CC) +if(ERF_CRAY_CXX) + set(CMAKE_CXX_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "C++ compiler") + message(STATUS " Set CMAKE_CXX_COMPILER = ${ERF_CRAY_CXX}") +else() + message(WARNING "ERF: On Cray system but 'CC' wrapper not found in PATH") +endif() + +# Find Cray Fortran compiler wrapper (if needed) +if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) + find_program(ERF_CRAY_FC ftn) + if(ERF_CRAY_FC) + set(CMAKE_Fortran_COMPILER "${ERF_CRAY_FC}" CACHE FILEPATH "Fortran compiler") + message(STATUS " Set CMAKE_Fortran_COMPILER = ${ERF_CRAY_FC}") + else() + message(WARNING "ERF: On Cray system but 'ftn' wrapper not found in PATH") + endif() +endif() + +message(STATUS "") \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index cd8ecac4e3..c96418ef2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,10 @@ ############################ BASE ###################################### cmake_minimum_required (VERSION 3.14 FATAL_ERROR) + +# Include Cray compiler detection BEFORE project() to set compilers +include(CMake/CrayCompilerDetection) + if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) project(ERF CXX C Fortran) else() From 43655551288de9da0f5c14cc8e32b50af04eeb09 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 15:30:37 -0800 Subject: [PATCH 09/44] Update compiler detection --- CMake/CrayCompilerDetection.cmake | 42 +++++++- CMake/CrayDetection.cmake | 166 ++++++++++++++++++------------ CMakeLists.txt | 3 +- 3 files changed, 142 insertions(+), 69 deletions(-) diff --git a/CMake/CrayCompilerDetection.cmake b/CMake/CrayCompilerDetection.cmake index 438a60efbb..ab4822def6 100644 --- a/CMake/CrayCompilerDetection.cmake +++ b/CMake/CrayCompilerDetection.cmake @@ -75,4 +75,44 @@ if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) endif() endif() -message(STATUS "") \ No newline at end of file +message(STATUS "") + +# ----------------------------------------------------------------------------- +# GPU Host Compilers (for CUDA, HIP, SYCL) +# ----------------------------------------------------------------------------- + +# CUDA Host Compiler - detect via environment +if(DEFINED ENV{CUDA_HOME} OR DEFINED ENV{CUDATOOLKIT_HOME} OR DEFINED ENV{CRAY_ACCEL_TARGET}) + message(STATUS " Detected CUDA environment, configuring CUDA host compiler...") + + # Only set if not already specified by user + if(NOT CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}) + if(ERF_CRAY_CXX) + set(CMAKE_CUDA_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "CUDA host compiler" FORCE) + message(STATUS " Set CMAKE_CUDA_HOST_COMPILER = ${ERF_CRAY_CXX}") + endif() + else() + message(STATUS " CUDA host compiler already set by user") + endif() +endif() + +# HIP Host Compiler - detect via ROCM environment +if(DEFINED ENV{ROCM_PATH} OR DEFINED ENV{HIP_PATH}) + message(STATUS " Detected ROCm/HIP environment, configuring HIP host compiler...") + + # Only set if not already specified by user + if(NOT CMAKE_HIP_HOST_COMPILER AND NOT DEFINED ENV{HIPHOSTCXX}) + if(ERF_CRAY_CXX) + set(CMAKE_HIP_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "HIP host compiler" FORCE) + message(STATUS " Set CMAKE_HIP_HOST_COMPILER = ${ERF_CRAY_CXX}") + endif() + else() + message(STATUS " HIP host compiler already set by user") + endif() +endif() + +# SYCL - detect via Intel oneAPI +if(DEFINED ENV{ONEAPI_ROOT} OR DEFINED ENV{I_MPI_ROOT}) + message(STATUS " Detected Intel oneAPI environment") + message(STATUS " SYCL will use CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") +endif() \ No newline at end of file diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 157ac2691a..bf3641050f 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -78,25 +78,25 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.0") message(FATAL_ERROR - "\n" - "════════════════════════════════════════════════════════════════\n" - "ERF requires GCC 8.0+ for C++17 support\n" - "Found: GCC ${CMAKE_CXX_COMPILER_VERSION}\n" - "════════════════════════════════════════════════════════════════\n" - "\n" - "On Cray systems, fix by using the Cray wrapper with a modern compiler:\n" - " 1. Load a newer compiler module:\n" - " module load PrgEnv-gnu\n" - " module load gcc\n" - "\n" - " 2. Set compiler explicitly:\n" - " -DCMAKE_CXX_COMPILER=\$(which CC)\n" - " Or set environment variable:\n" - " export CXX=\$(which CC)\n" - "\n" - " 3. Verify compiler version:\n" - " CC --version\n" - "\n") + "\n" + "════════════════════════════════════════════════════════════════\n" + "ERF requires GCC 8.0+ for C++17 support\n" + "Found: GCC ${CMAKE_CXX_COMPILER_VERSION}\n" + "════════════════════════════════════════════════════════════════\n" + "\n" + "On Cray systems, fix by using the Cray wrapper with a modern compiler:\n" + " 1. Load a newer compiler module:\n" + " module load PrgEnv-gnu\n" + " module load gcc\n" + "\n" + " 2. Set compiler explicitly:\n" + " -DCMAKE_CXX_COMPILER=\$(which CC)\n" + " Or set environment variable:\n" + " export CXX=\$(which CC)\n" + "\n" + " 3. Verify compiler version:\n" + " CC --version\n" + "") else() message(STATUS " GCC version ${CMAKE_CXX_COMPILER_VERSION} >= 8.0 ✓") erf_cray_verbose("GCC version sufficient for C++17 ") @@ -304,14 +304,15 @@ message(STATUS "ERF: Checking Cray prerequisites...") set(ERF_RECOMMENDED_CMAKE_VERSION "3.24.0") if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) - message(WARNING "") - message(WARNING "ERF: CMake version ${CMAKE_VERSION} detected") - message(WARNING " Recommended minimum for Cray systems: ${ERF_RECOMMENDED_CMAKE_VERSION}") - message(WARNING " You may experience issues with Cray compiler wrappers and CUDA") - message(WARNING "") - message(WARNING " To fix:") - message(WARNING " module load cmake") - message(WARNING "") + message(WARNING + "\n" + "ERF: CMake version ${CMAKE_VERSION} detected\n" + " Recommended minimum for Cray systems: ${ERF_RECOMMENDED_CMAKE_VERSION}\n" + " You may experience issues with Cray compiler wrappers and CUDA\n" + "\n" + " To fix:\n" + " module load cmake\n" + "") erf_cray_verbose("Current CMake: ${CMAKE_VERSION}") erf_cray_verbose("Recommended: ${ERF_RECOMMENDED_CMAKE_VERSION}+") @@ -359,20 +360,21 @@ if(ERF_ENABLE_CUDA) # Warn if CUDA toolkit doesn't appear to be loaded if(NOT CUDA_TOOLKIT_LOADED) - message(WARNING "") - message(WARNING "ERF: CUDA enabled but CUDA toolkit not detected") - message(WARNING " Expected environment variables not found:") - message(WARNING " - CUDA_HOME") - message(WARNING " - CUDATOOLKIT_HOME") - message(WARNING " - nvcc in PATH") - message(WARNING "") - message(WARNING " To fix:") - message(WARNING " module load cudatoolkit") - message(WARNING " Or on newer systems:") - message(WARNING " module load cuda") - message(WARNING "") - message(WARNING " Build may fail with CUDA-related errors") - message(WARNING "") + message(WARNING + "\n" + "ERF: CUDA enabled but CUDA toolkit not detected\n" + " Expected environment variables not found:\n" + " - CUDA_HOME\n" + " - CUDATOOLKIT_HOME\n" + " - nvcc in PATH\n" + "\n" + " To fix:\n" + " module load cudatoolkit\n" + " Or on newer systems:\n" + " module load cuda\n" + "\n" + " Build may fail with CUDA-related errors\n" + "") erf_cray_verbose("CUDA_HOME = $ENV{CUDA_HOME}") erf_cray_verbose("CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") @@ -622,7 +624,14 @@ endif() # which enables GPU Transfer Library for direct GPU-GPU communication # SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags -if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) +# ============================================================================== +# Fix 4: GPU-aware MPI with Cray GTL (Checklist Item 4) +# ============================================================================== +# PROBLEM: GPU-aware MPI on Cray requires linking against mpi_gtl_cuda library +# which enables GPU Transfer Library for direct GPU-GPU communication +# SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags + +if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") message(STATUS "ERF: [Fix 4] Applying GPU-aware MPI fix (Cray GTL)") erf_cray_verbose("Problem: GPU-aware MPI needs Cray GTL libraries") @@ -630,42 +639,65 @@ if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED} erf_cray_verbose("Solution: Add -lmpi_gnu_123 -lmpi_gtl_cuda to link flags") erf_cray_verbose("MPICH_GPU_SUPPORT_ENABLED = $ENV{MPICH_GPU_SUPPORT_ENABLED}") - # Detect MPI library version for correct library name - # TODO: Could auto-detect the actual version instead of hardcoding _gnu_123 + # Set the MPI+GTL libraries + # Note: We use -lmpi_gnu_123 explicitly because Cray's --as-needed can drop it set(CRAY_MPI_LIBS "-lmpi_gnu_123 -lmpi_gtl_cuda") - erf_cray_verbose("Looking for mpi_gtl_cuda library...") + # Try to verify the library exists (for diagnostics) + set(MPI_LIB_SEARCH_PATHS "") if(DEFINED ENV{MPICH_DIR}) - erf_cray_verbose("MPICH_DIR = $ENV{MPICH_DIR}") + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") + endif() + if(DEFINED ENV{CRAY_MPICH_DIR}) + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") endif() - # Check if the libraries exist - find_library(CRAY_MPI_GTL_CUDA mpi_gtl_cuda HINTS ENV MPICH_DIR PATH_SUFFIXES lib) + erf_cray_verbose("Searching for mpi_gtl_cuda library in:") + foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) + erf_cray_verbose(" ${path}") + endforeach() + + find_library(CRAY_MPI_GTL_CUDA + NAMES mpi_gtl_cuda + HINTS ${MPI_LIB_SEARCH_PATHS} + NO_DEFAULT_PATH + ) if(CRAY_MPI_GTL_CUDA) - message(STATUS " Found Cray GPU-aware MPI library: ${CRAY_MPI_GTL_CUDA}") - erf_cray_verbose("Adding to CMAKE_CUDA_STANDARD_LIBRARIES") - erf_cray_verbose("Adding to CMAKE_CXX_STANDARD_LIBRARIES") - - set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) - set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) - - erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") - erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") + message(STATUS " Found GTL library: ${CRAY_MPI_GTL_CUDA}") + erf_cray_verbose("Library verification successful") else() - message(WARNING "ERF: GPU-aware MPI requested but GTL libraries not found") - message(WARNING " Set MPICH_GPU_SUPPORT_ENABLED=1 and check MPICH_DIR") - message(WARNING " Manual workaround: -DCMAKE_CUDA_STANDARD_LIBRARIES=\"${CRAY_MPI_LIBS}\"") - message(WARNING " -DCMAKE_CXX_STANDARD_LIBRARIES=\"${CRAY_MPI_LIBS}\"") - erf_cray_verbose("Search paths: $ENV{MPICH_DIR}/lib") - erf_cray_verbose("Library not found: mpi_gtl_cuda") + message(STATUS " GTL library not found via find_library (will rely on linker search)") + erf_cray_verbose("Library not found in search paths, but linker may still find it") + erf_cray_verbose("This is normal if libraries are in non-standard Cray locations") endif() + + # Apply the fix regardless of whether find_library succeeded + # The Cray linker knows where to find these libraries + message(STATUS " Adding MPI+GTL libraries: ${CRAY_MPI_LIBS}") + erf_cray_verbose("Adding to CMAKE_CUDA_STANDARD_LIBRARIES") + erf_cray_verbose("Adding to CMAKE_CXX_STANDARD_LIBRARIES") + + set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + + erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") + else() if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI) - erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED not set") - erf_cray_verbose("Set MPICH_GPU_SUPPORT_ENABLED=1 to enable GPU-aware MPI") + if(NOT DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) + message(STATUS "") + message(STATUS " Note: MPICH_GPU_SUPPORT_ENABLED not set") + message(STATUS " For GPU-aware MPI, add to your script:") + message(STATUS " export MPICH_GPU_SUPPORT_ENABLED=1") + message(STATUS "") + erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED not set") + elseif(NOT "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") + erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED=$ENV{MPICH_GPU_SUPPORT_ENABLED} (not '1')") + endif() else() erf_cray_verbose("Fix 4 not needed (CUDA+MPI not both enabled)") endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index c96418ef2d..69a1814b21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,8 @@ cmake_minimum_required (VERSION 3.14 FATAL_ERROR) # Include Cray compiler detection BEFORE project() to set compilers -include(CMake/CrayCompilerDetection) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake") +include(CrayCompilerDetection) if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) project(ERF CXX C Fortran) From 42dcafc068fcebae5865370cd7a5b23998be6a09 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 16:14:36 -0800 Subject: [PATCH 10/44] Update printopts --- CMake/CrayCompilerDetection.cmake | 84 +++++++++++++++++++++++++++++++ CMake/CrayDetection.cmake | 33 +++++++----- 2 files changed, 104 insertions(+), 13 deletions(-) diff --git a/CMake/CrayCompilerDetection.cmake b/CMake/CrayCompilerDetection.cmake index ab4822def6..28b7096cd2 100644 --- a/CMake/CrayCompilerDetection.cmake +++ b/CMake/CrayCompilerDetection.cmake @@ -115,4 +115,88 @@ endif() if(DEFINED ENV{ONEAPI_ROOT} OR DEFINED ENV{I_MPI_ROOT}) message(STATUS " Detected Intel oneAPI environment") message(STATUS " SYCL will use CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") +endif() + +# ----------------------------------------------------------------------------- +# Detect Cray MPI and GTL Library Names (with smart fallbacks) +# ----------------------------------------------------------------------------- + +set(CRAY_MPI_LIB "") +set(CRAY_GTL_LIB "") + +# Method 1: Parse from CC --cray-print-opts=libs (BEST) +if(ERF_CRAY_CXX) + execute_process( + COMMAND ${ERF_CRAY_CXX} --cray-print-opts=libs + OUTPUT_VARIABLE CRAY_LIBS_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + RESULT_VARIABLE CRAY_LIBS_RESULT + ) + + if(CRAY_LIBS_RESULT EQUAL 0 AND CRAY_LIBS_OUTPUT) + string(REGEX MATCH "-lmpi_gnu_[0-9]+" CRAY_MPI_LIB "${CRAY_LIBS_OUTPUT}") + string(REGEX MATCH "-lmpi_gtl_[a-z]+" CRAY_GTL_LIB "${CRAY_LIBS_OUTPUT}") + endif() +endif() + +# Method 2: Fallback from environment variables +if(NOT CRAY_MPI_LIB AND DEFINED ENV{CRAY_MPICH_DIR}) + string(REGEX MATCH "/gnu/([0-9]+)\\.([0-9]+)" MATCH_RESULT "$ENV{CRAY_MPICH_DIR}") + if(CMAKE_MATCH_1 AND CMAKE_MATCH_2) + set(CRAY_MPI_LIB "-lmpi_gnu_${CMAKE_MATCH_1}${CMAKE_MATCH_2}") + endif() +endif() + +if(NOT CRAY_GTL_LIB AND DEFINED ENV{CRAY_ACCEL_TARGET}) + set(GTL_VAR "PE_MPICH_GTL_LIBS_$ENV{CRAY_ACCEL_TARGET}") + if(DEFINED ENV{${GTL_VAR}}) + set(CRAY_GTL_LIB "$ENV{${GTL_VAR}}") + elseif("$ENV{CRAY_ACCEL_TARGET}" MATCHES "nvidia") + set(CRAY_GTL_LIB "-lmpi_gtl_cuda") + elseif("$ENV{CRAY_ACCEL_TARGET}" MATCHES "amd") + set(CRAY_GTL_LIB "-lmpi_gtl_hsa") + endif() +endif() + +# Method 3: Ultimate fallback +if(NOT CRAY_MPI_LIB) + set(CRAY_MPI_LIB "-lmpi") +endif() + +# Combine +if(CRAY_MPI_LIB AND CRAY_GTL_LIB) + set(GTL_LIBS "${CRAY_MPI_LIB} ${CRAY_GTL_LIB}") +elseif(CRAY_MPI_LIB) + set(GTL_LIBS "${CRAY_MPI_LIB}") +endif() + +# ----------------------------------------------------------------------------- +# Set Minimal Flags for Compiler Tests (using detected libraries) +# ----------------------------------------------------------------------------- +message(STATUS "ERF: Setting minimal flags for compiler tests...") + +if(DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED} AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") + message(STATUS " GPU-aware MPI detected, adding CUDA runtime for tests") + message(STATUS " Detected libraries: ${GTL_LIBS}") + + # APPEND to linker flags + if(CMAKE_EXE_LINKER_FLAGS) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lcudart -lcuda" CACHE STRING "" FORCE) + else() + set(CMAKE_EXE_LINKER_FLAGS "-lcudart -lcuda" CACHE STRING "" FORCE) + endif() + + # APPEND to standard libraries (use DETECTED GTL_LIBS, not hardcoded!) + if(CMAKE_CXX_STANDARD_LIBRARIES) + set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${GTL_LIBS}" CACHE STRING "" FORCE) + else() + set(CMAKE_CXX_STANDARD_LIBRARIES "${GTL_LIBS}" CACHE STRING "" FORCE) + endif() + + if(CMAKE_CUDA_STANDARD_LIBRARIES) + set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${GTL_LIBS}" CACHE STRING "" FORCE) + else() + set(CMAKE_CUDA_STANDARD_LIBRARIES "${GTL_LIBS}" CACHE STRING "" FORCE) + endif() endif() \ No newline at end of file diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index bf3641050f..4ee267d29f 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -710,23 +710,30 @@ endif() # PROBLEM 6: pkg-config may not find MPI/NetCDF without correct PKG_CONFIG_PATH # SOLUTION: Set up pkg-config path and add NetCDF/HDF5 directories to search -if(ERF_ENABLE_NETCDF AND DEFINED ENV{MPICH_DIR}) +if(ERF_ENABLE_NETCDF) message(STATUS "ERF: [Fix 5-6] Configuring NetCDF with Cray paths") - erf_cray_verbose("Problem 5: Cray NetCDF may have non-standard library names") - erf_cray_verbose("Problem 6: pkg-config needs MPICH_DIR in PKG_CONFIG_PATH") - erf_cray_verbose("Condition: ERF_ENABLE_NETCDF=ON and MPICH_DIR set") - erf_cray_verbose("Solution: Set PKG_CONFIG_PATH and add search paths") + # Get PKG_CONFIG_PATH directly from Cray wrapper + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} --cray-print-opts=PKG_CONFIG_PATH + OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + RESULT_VARIABLE PKG_RESULT + ) - # Add MPICH pkg-config path - set(PKG_CONFIG_PATH "$ENV{MPICH_DIR}/lib/pkgconfig") - if(DEFINED ENV{PKG_CONFIG_PATH}) - set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") - erf_cray_verbose("Appending to existing PKG_CONFIG_PATH") - else() - erf_cray_verbose("Creating new PKG_CONFIG_PATH") + if(PKG_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) + message(STATUS " Setting PKG_CONFIG_PATH from Cray wrapper") + + # Append to existing PKG_CONFIG_PATH + if(DEFINED ENV{PKG_CONFIG_PATH}) + set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") + else() + set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}") + endif() + + message(STATUS " PKG_CONFIG_PATH = $ENV{PKG_CONFIG_PATH}") endif() - set(ENV{PKG_CONFIG_PATH} ${PKG_CONFIG_PATH}) message(STATUS " PKG_CONFIG_PATH = ${PKG_CONFIG_PATH}") erf_cray_verbose("This allows cmake/gnumake to find MPI and NetCDF via pkg-config") From 8a28bfc807ce5a833d037cb2601ab39fb5908cb9 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 7 Nov 2025 16:50:21 -0800 Subject: [PATCH 11/44] Add mpicxx hang workaround --- CMakeLists.txt | 96 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 69a1814b21..ecb9e71e62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,20 +203,71 @@ if(ERF_ENABLE_EKAT) endif() ########################### MPI ##################################### -##if(ERF_ENABLE_MPI) -## set(_mpi_comps C CXX) # Do we need MPI_C ? -## if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) -## list(APPEND _mpi_comps Fortran) -## endif() -## find_package(MPI REQUIRED ${_mpi_comps}) -### list(TRANSFORM _mpi_comps PREPEND "MPI::MPI_") -### foreach(D IN LISTS AMReX_SPACEDIM) -### target_link_libraries(amrex_${D}d PUBLIC ${_mpi_comps}) -### endforeach() -### unset(_mpi_comps) -##endif() - message(STATUS "mpi section done") +if(ERF_ENABLE_MPI) + # Check if we're on Cray with bare MPI wrappers (which will hang) + set(SKIP_MPI_DETECTION FALSE) + + if(DEFINED ENV{CRAYPE_VERSION} OR DEFINED ENV{CRAY_MPICH_DIR}) + # On Cray system - check if using problematic bare MPI wrappers + if(CMAKE_CXX_COMPILER MATCHES "mpicxx" OR + CMAKE_C_COMPILER MATCHES "mpicc" OR + CMAKE_Fortran_COMPILER MATCHES "mpifort") + message(STATUS "Detected bare MPI wrappers on Cray - skipping MPI detection (would hang)") + set(SKIP_MPI_DETECTION TRUE) + endif() + endif() + + if(SKIP_MPI_DETECTION) + # Workaround: Manual MPI setup (avoids hang) + message(STATUS "Manually configuring MPI (bypassing find_package)...") + + # Get Cray MPICH version for informational purposes + set(MPICH_VERSION "UNKNOWN") + if(DEFINED ENV{CRAY_MPICH_VERSION}) + set(MPICH_VERSION "$ENV{CRAY_MPICH_VERSION}") + elseif(DEFINED ENV{CRAY_MPICH_VER}) + set(MPICH_VERSION "$ENV{CRAY_MPICH_VER}") + endif() + + # Cray MPICH 8.x supports MPI 3.1 standard + set(MPI_VERSION "3.1") + + # Create MPI targets + if(NOT TARGET MPI::MPI_CXX) + add_library(MPI::MPI_CXX INTERFACE IMPORTED) + endif() + + if(NOT TARGET MPI::MPI_C) + add_library(MPI::MPI_C INTERFACE IMPORTED) + endif() + + # Set MPI variables + set(MPI_FOUND TRUE) + set(MPI_CXX_FOUND TRUE) + set(MPI_C_FOUND TRUE) + set(MPI_C_VERSION "${MPI_VERSION}") + set(MPI_CXX_VERSION "${MPI_VERSION}") + + message(STATUS " Cray MPICH implementation: ${MPICH_VERSION}") + message(STATUS " MPI API standard: ${MPI_VERSION}") + message(STATUS " Created MPI::MPI_CXX and MPI::MPI_C targets") + else() + # Normal path: Use find_package + message(STATUS "Using find_package(MPI) for detection...") + set(_mpi_comps C CXX) + if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) + list(APPEND _mpi_comps Fortran) + endif() + find_package(MPI REQUIRED ${_mpi_comps}) + endif() +endif() + +message(STATUS "MPI section complete") + ########################## NETCDF ################################## +message(STATUS "DEBUG: ERF_ENABLE_NETCDF = ${ERF_ENABLE_NETCDF}") +message(STATUS "DEBUG: ERF_ENABLE_TOOLS = ${ERF_ENABLE_TOOLS}") +message(STATUS "DEBUG: About to check NetCDF section...") if(ERF_ENABLE_NETCDF OR ERF_ENABLE_TOOLS) set(CMAKE_PREFIX_PATH ${NETCDF_DIR} ${CMAKE_PREFIX_PATH}) @@ -227,6 +278,8 @@ if(ERF_ENABLE_NETCDF OR ERF_ENABLE_TOOLS) endif() endif() +message(STATUS "DEBUG: NetCDF section complete") + ########################## NOAH-MP ################################## if(ERF_ENABLE_NOAHMP) @@ -237,6 +290,7 @@ if(ERF_ENABLE_NOAHMP) else() message(FATAL_ERROR "Noah-MP requires NetCDF be enabled") endif() +message(STATUS "DEBUG: NOAH-MP section complete") endif() ########################### RRTMGP ################################# @@ -260,6 +314,7 @@ if(ERF_ENABLE_RRTMGP) set(RRTMGP_ENABLE_KOKKOS TRUE) target_compile_definitions(rrtmgp PUBLIC RRTMGP_ENABLE_KOKKOS) target_link_libraries(rrtmgp kokkos) +message(STATUS "DEBUG: RRTMGP section complete") endif() ########################### SHOC ################################# @@ -270,24 +325,11 @@ if(ERF_ENABLE_SHOC) endif() # NOTE: We compile shoc src files directly - +message(STATUS "DEBUG: SHOC section complete") endif() ########################### ERF ##################################### -if(ERF_ENABLE_MPI) - set(_mpi_comps C CXX) # Do we need MPI_C ? - if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) - list(APPEND _mpi_comps Fortran) - endif() - find_package(MPI REQUIRED ${_mpi_comps}) -# list(TRANSFORM _mpi_comps PREPEND "MPI::MPI_") -# foreach(D IN LISTS AMReX_SPACEDIM) -# target_link_libraries(amrex_${D}d PUBLIC ${_mpi_comps}) -# endforeach() -# unset(_mpi_comps) -endif() - # General information about machine, compiler, and build type message(STATUS "ERF Information:") message(STATUS "CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}") From b560e0e3e20d99b08f01f8ef7913757bb1f09b5a Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Sat, 8 Nov 2025 04:52:46 -0800 Subject: [PATCH 12/44] Make distclean broader --- .gitignore | 38 +++++++++++++++ Build/wrapper_clean_build.sh | 78 +++++++++++++++++++++++++++++++ Build/wrapper_clean_build_auto.sh | 56 ++++++++++++++++++++-- 3 files changed, 168 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 880d4460cd..39ec67b33e 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,41 @@ cmake*ser* cmake*par* .idea *.ipynb + +# CMake build artifacts +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake +CTestTestfile.cmake +DartConfiguration.tcl +Makefile +compile_commands.json +Testing/ +_deps/ + +# Project-specific generated files +ERFConfig.cmake +*.pc +git-state.txt + +# Build outputs +lib*.a +lib*.so +bin/ +Exec/ +Submodules/ +Tests/ +cmake_packages/ +externals/ + +# Build and install directories +build_*/ +install_*/ + +# Log files +build_*.log + +# Editor backups +*~ +\#*\# +.#* \ No newline at end of file diff --git a/Build/wrapper_clean_build.sh b/Build/wrapper_clean_build.sh index b647e76dc5..fb53a06427 100755 --- a/Build/wrapper_clean_build.sh +++ b/Build/wrapper_clean_build.sh @@ -113,6 +113,76 @@ fi [ -d "_deps" ] && FILES_TO_DELETE="$FILES_TO_DELETE _deps/" [ -f "compile_commands.json" ] && FILES_TO_DELETE="$FILES_TO_DELETE compile_commands.json" +# === Add after initial FILES_TO_DELETE setup === + +# Built artifact directories (these are built, not source) +for d in Exec Submodules Tests bin cmake_packages externals; do + [ -d "$d" ] && FILES_TO_DELETE="$FILES_TO_DELETE $d/" +done + +# CTest artifacts +[ -f "DartConfiguration.tcl" ] && FILES_TO_DELETE="$FILES_TO_DELETE DartConfiguration.tcl" + +# Generated project config +[ -f "ERFConfig.cmake" ] && FILES_TO_DELETE="$FILES_TO_DELETE ERFConfig.cmake" + +# pkg-config files +find . -maxdepth 1 -name "*.pc" -type f 2>/dev/null | while read -r f; do + FILES_TO_DELETE="$FILES_TO_DELETE $f" +done + +# Build artifacts (libraries) +find . -maxdepth 1 \( -name "lib*.a" -o -name "lib*.so" \) -type f 2>/dev/null | while read -r f; do + FILES_TO_DELETE="$FILES_TO_DELETE $f" +done + +# Build logs +for f in build_*.log git-state.txt; do + [ -f "$f" ] && FILES_TO_DELETE="$FILES_TO_DELETE $f" +done + +# === Check for install directory from CMakeCache.txt === +INSTALL_DIR="" +if [ -f "CMakeCache.txt" ]; then + # Extract CMAKE_INSTALL_PREFIX from cache + INSTALL_PREFIX=$(grep "^CMAKE_INSTALL_PREFIX:" CMakeCache.txt | cut -d'=' -f2) + + if [ -n "$INSTALL_PREFIX" ] && [ -d "$INSTALL_PREFIX" ]; then + # Convert to absolute path for comparison + INSTALL_DIR=$(cd "$INSTALL_PREFIX" 2>/dev/null && pwd || echo "$INSTALL_PREFIX") + + # Check if it's a subdirectory of current directory (local install) + CURRENT_DIR=$(pwd) + if [[ "$INSTALL_DIR" == "$CURRENT_DIR"/* ]]; then + # It's a local install directory + INSTALL_DIR_RELATIVE=$(realpath --relative-to="$CURRENT_DIR" "$INSTALL_DIR" 2>/dev/null || \ + python3 -c "import os.path; print(os.path.relpath('$INSTALL_DIR', '$CURRENT_DIR'))" 2>/dev/null || \ + echo "$INSTALL_DIR") + + echo "" + echo "==========================================" + echo "Install Directory Detected" + echo "==========================================" + echo "This build is configured to install to:" + echo " $INSTALL_DIR_RELATIVE" + echo "" + echo "This directory contains installed artifacts and is separate" + echo "from the build configuration (distclean does NOT remove it)." + echo "" + + if [ -d "$INSTALL_DIR" ]; then + read -p "Also remove install directory? [y/N] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + CLEAN_INSTALL_DIR="$INSTALL_DIR" + fi + fi + fi + fi +fi + +# === Then in the deletion section, after FILES_TO_DELETE cleanup === + # If there's nothing to clean, just run the script if [ -z "$FILES_TO_DELETE" ]; then echo "Directory is already clean, proceeding with build..." @@ -168,6 +238,14 @@ else rm -f "$f" && echo " ✓ Deleted file: $f" fi done + + # Clean install directory if requested + if [ -n "$CLEAN_INSTALL_DIR" ]; then + echo "" + echo "Removing install directory..." + rm -rf "$CLEAN_INSTALL_DIR" && echo " ✓ Deleted: $CLEAN_INSTALL_DIR" + fi + echo "" echo "Distclean complete. Ready for fresh configuration." echo "" diff --git a/Build/wrapper_clean_build_auto.sh b/Build/wrapper_clean_build_auto.sh index a3f757555a..3e024a23c6 100755 --- a/Build/wrapper_clean_build_auto.sh +++ b/Build/wrapper_clean_build_auto.sh @@ -12,9 +12,12 @@ set -e # ============================================================================ SCRIPT=$1 +CLEAN_INSTALL=${2:-no} # Optional: "yes" to also clean install dir if [ -z "$SCRIPT" ]; then echo "ERROR: No build script provided" + echo "Usage: $0 [clean_install]" + echo " clean_install: 'yes' to also remove CMAKE_INSTALL_PREFIX (default: no)" exit 1 fi @@ -27,15 +30,60 @@ echo "==========================================" echo "AUTO MODE: Performing distclean" echo "==========================================" echo "Deleting CMake configuration and build artifacts..." -echo "(Install directories NOT affected)" echo "" -# Delete all CMake artifacts (distclean equivalent) +# Check for install directory before deleting CMakeCache.txt +INSTALL_DIR="" +if [ -f "CMakeCache.txt" ] && [ "$CLEAN_INSTALL" = "yes" ]; then + INSTALL_PREFIX=$(grep "^CMAKE_INSTALL_PREFIX:" CMakeCache.txt 2>/dev/null | cut -d'=' -f2 || true) + if [ -n "$INSTALL_PREFIX" ] && [ -d "$INSTALL_PREFIX" ]; then + INSTALL_DIR="$INSTALL_PREFIX" + echo "Install directory found: $INSTALL_DIR" + fi +fi + +# Delete CMake configuration files rm -rf CMakeCache.txt CMakeFiles/ Makefile cmake_install.cmake \ CTestTestfile.cmake Testing/ _deps/ compile_commands.json \ - *.cmake 2>/dev/null || true + 2>/dev/null || true + +# Delete CTest/CDash artifacts +rm -f DartConfiguration.tcl 2>/dev/null || true + +# Delete generated project config +rm -f ERFConfig.cmake 2>/dev/null || true + +# Delete any remaining .cmake files (excluding source CMakeLists.txt) +find . -maxdepth 1 -name "*.cmake" -type f -exec rm -f {} \; 2>/dev/null || true + +# Delete pkg-config files +rm -f *.pc 2>/dev/null || true -echo "✓ Cleaned: CMakeCache.txt, CMakeFiles/, Makefile, *.cmake, etc." +# Delete built artifact directories +rm -rf Exec/ Submodules/ Tests/ bin/ cmake_packages/ externals/ 2>/dev/null || true + +# Delete built libraries +rm -f lib*.a lib*.so 2>/dev/null || true + +# Delete build logs +rm -f build_*.log git-state.txt 2>/dev/null || true + +echo "✓ Cleaned: CMake configuration files" +echo "✓ Cleaned: Build artifacts (Exec/, Submodules/, Tests/, bin/, etc.)" +echo "✓ Cleaned: Libraries (lib*.a, lib*.so)" +echo "✓ Cleaned: Build logs and editor backups" + +# Clean install directory if requested +if [ -n "$INSTALL_DIR" ]; then + echo "" + echo "Cleaning install directory (CLEAN_INSTALL=yes): $INSTALL_DIR" + rm -rf "$INSTALL_DIR" && echo "✓ Deleted: $INSTALL_DIR" +elif [ -f "CMakeCache.txt.deleted" ]; then + echo "" + echo "Install directory NOT cleaned (use CLEAN_INSTALL=yes to clean)" +fi + +echo "" echo "✓ Directory ready for fresh configuration" echo "" From 3963cb6d679c24ff9084e31d95370d4d3194c273 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Sat, 8 Nov 2025 05:11:38 -0800 Subject: [PATCH 13/44] Add distclean target --- CMake/UtilityTargets.cmake | 60 ++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 1 + 2 files changed, 61 insertions(+) create mode 100644 CMake/UtilityTargets.cmake diff --git a/CMake/UtilityTargets.cmake b/CMake/UtilityTargets.cmake new file mode 100644 index 0000000000..295ae37b3f --- /dev/null +++ b/CMake/UtilityTargets.cmake @@ -0,0 +1,60 @@ +add_custom_target(distclean + # Header + COMMAND ${CMAKE_COMMAND} -E echo "==========================================" + COMMAND ${CMAKE_COMMAND} -E echo "Distclean: ${CMAKE_BINARY_DIR}" + COMMAND ${CMAKE_COMMAND} -E echo "==========================================" + + # CMake configuration files (generated by cmake during configuration) + COMMAND ${CMAKE_COMMAND} -E remove -f + CMakeCache.txt # Main CMake cache + cmake_install.cmake # Install script + Makefile # Generated Makefile (if using Make generator) + + # CTest files (generated by enable_testing() or ctest) + COMMAND ${CMAKE_COMMAND} -E remove -f + CTestTestfile.cmake # CTest configuration + DartConfiguration.tcl # CDash/Dart configuration + + # Project-specific generated files + COMMAND ${CMAKE_COMMAND} -E remove -f + ERFConfig.cmake # Generated by configure_file() or export() + compile_commands.json # Generated by CMAKE_EXPORT_COMPILE_COMMANDS + git-state.txt # Custom git state tracking + + # CMake-generated directories + COMMAND ${CMAKE_COMMAND} -E remove_directory + CMakeFiles # CMake build system files + Testing # CTest output + _deps # FetchContent dependencies + + # Build output directories (project-specific) + COMMAND ${CMAKE_COMMAND} -E remove_directory + Exec # Built executables + Submodules # Built submodule artifacts + Tests # Built test executables + bin # Binary output directory + cmake_packages # CMake package configs + externals # External library builds + + # pkg-config files (generated by configure_file() for *.pc.in) + COMMAND ${CMAKE_COMMAND} -E echo "Removing pkg-config files..." + COMMAND find . -maxdepth 1 -name "*.pc" -type f -delete 2>/dev/null || true + + # Built libraries (linker outputs) + COMMAND ${CMAKE_COMMAND} -E echo "Removing built libraries..." + COMMAND find . -maxdepth 1 -name "lib*.a" -type f -delete 2>/dev/null || true + COMMAND find . -maxdepth 1 -name "lib*.so" -type f -delete 2>/dev/null || true + + # Build logs (custom logging from build scripts - optional) + COMMAND ${CMAKE_COMMAND} -E echo "Removing build logs..." + COMMAND find . -maxdepth 1 -name "build_*.log" -type f -delete 2>/dev/null || true + + # Summary + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "✓ Distclean complete" + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Note: Install directories preserved" + + COMMENT "Removing all CMake configuration and build artifacts" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} +) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index ecb9e71e62..e716b4fee2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ include(CMakePackageConfigHelpers) # Include Cray/Perlmutter auto-detection and fixes include(CrayDetection) +include(UtilityTargets) ########################## OPTIONS ##################################### From 887f513c136244da8fda1bd9a4ea41d92d600962 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Sat, 8 Nov 2025 05:37:18 -0800 Subject: [PATCH 14/44] Add make uninstall for cmake --- CMake/UtilityTargets.cmake | 13 +++++++++++++ CMake/cmake_uninstall.cmake.in | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 CMake/cmake_uninstall.cmake.in diff --git a/CMake/UtilityTargets.cmake b/CMake/UtilityTargets.cmake index 295ae37b3f..e75380bcf2 100644 --- a/CMake/UtilityTargets.cmake +++ b/CMake/UtilityTargets.cmake @@ -1,3 +1,16 @@ +# Add uninstall target +if(NOT TARGET uninstall) + configure_file( + "${CMAKE_SOURCE_DIR}/CMake/cmake_uninstall.cmake.in" + "${CMAKE_BINARY_DIR}/cmake_uninstall.cmake" + IMMEDIATE @ONLY) + + add_custom_target(uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}/cmake_uninstall.cmake + COMMENT "Uninstalling files listed in install_manifest.txt" + ) +endif() + add_custom_target(distclean # Header COMMAND ${CMAKE_COMMAND} -E echo "==========================================" diff --git a/CMake/cmake_uninstall.cmake.in b/CMake/cmake_uninstall.cmake.in new file mode 100644 index 0000000000..1349686744 --- /dev/null +++ b/CMake/cmake_uninstall.cmake.in @@ -0,0 +1,24 @@ +if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") + message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") +endif() + +file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) +string(REGEX REPLACE "\n" ";" files "${files}") + +foreach(file ${files}) + message(STATUS "Uninstalling $ENV{DESTDIR}${file}") + if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") + exec_program( + "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + OUTPUT_VARIABLE rm_out + RETURN_VALUE rm_retval + ) + if(NOT "${rm_retval}" STREQUAL 0) + message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") + endif() + else() + message(STATUS "File $ENV{DESTDIR}${file} does not exist.") + endif() +endforeach() + +message(STATUS "Uninstall complete") \ No newline at end of file From 2cd31f70e71f23097d17d040b923483257aa3aa0 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Sat, 8 Nov 2025 05:45:00 -0800 Subject: [PATCH 15/44] Update distclean and uninstall --- CMake/UtilityTargets.cmake | 15 ++++++++++++--- CMake/cmake_uninstall.cmake.in | 11 ++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/CMake/UtilityTargets.cmake b/CMake/UtilityTargets.cmake index e75380bcf2..1664a3a87e 100644 --- a/CMake/UtilityTargets.cmake +++ b/CMake/UtilityTargets.cmake @@ -11,17 +11,25 @@ if(NOT TARGET uninstall) ) endif() +# Add distclean target add_custom_target(distclean # Header - COMMAND ${CMAKE_COMMAND} -E echo "==========================================" + COMMAND ${CMAKE_COMMAND} -E echo "==================================================================================" COMMAND ${CMAKE_COMMAND} -E echo "Distclean: ${CMAKE_BINARY_DIR}" - COMMAND ${CMAKE_COMMAND} -E echo "==========================================" + COMMAND ${CMAKE_COMMAND} -E echo "==================================================================================" # CMake configuration files (generated by cmake during configuration) COMMAND ${CMAKE_COMMAND} -E remove -f CMakeCache.txt # Main CMake cache cmake_install.cmake # Install script + cmake_uninstall.cmake # Generated uninstall script Makefile # Generated Makefile (if using Make generator) + install_manifest.txt # List of installed files + + # CPack files (generated by CPack for packaging) + COMMAND ${CMAKE_COMMAND} -E remove -f + CPackConfig.cmake # CPack configuration + CPackSourceConfig.cmake # CPack source package configuration # CTest files (generated by enable_testing() or ctest) COMMAND ${CMAKE_COMMAND} -E remove -f @@ -46,6 +54,7 @@ add_custom_target(distclean Submodules # Built submodule artifacts Tests # Built test executables bin # Binary output directory + erf_srclib # ERF source library output cmake_packages # CMake package configs externals # External library builds @@ -58,7 +67,7 @@ add_custom_target(distclean COMMAND find . -maxdepth 1 -name "lib*.a" -type f -delete 2>/dev/null || true COMMAND find . -maxdepth 1 -name "lib*.so" -type f -delete 2>/dev/null || true - # Build logs (custom logging from build scripts - optional) + # Build logs (custom logging from build scripts) COMMAND ${CMAKE_COMMAND} -E echo "Removing build logs..." COMMAND find . -maxdepth 1 -name "build_*.log" -type f -delete 2>/dev/null || true diff --git a/CMake/cmake_uninstall.cmake.in b/CMake/cmake_uninstall.cmake.in index 1349686744..830a6c6230 100644 --- a/CMake/cmake_uninstall.cmake.in +++ b/CMake/cmake_uninstall.cmake.in @@ -8,13 +8,14 @@ string(REGEX REPLACE "\n" ";" files "${files}") foreach(file ${files}) message(STATUS "Uninstalling $ENV{DESTDIR}${file}") if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") - exec_program( - "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + execute_process( + COMMAND "@CMAKE_COMMAND@" -E remove "$ENV{DESTDIR}${file}" + RESULT_VARIABLE rm_retval OUTPUT_VARIABLE rm_out - RETURN_VALUE rm_retval + ERROR_VARIABLE rm_err ) - if(NOT "${rm_retval}" STREQUAL 0) - message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") + if(NOT "${rm_retval}" STREQUAL "0") + message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}: ${rm_err}") endif() else() message(STATUS "File $ENV{DESTDIR}${file} does not exist.") From a05960622584f1d926488df0715203065e354363 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Sat, 8 Nov 2025 06:27:40 -0800 Subject: [PATCH 16/44] Add ERF_DIR detection to shoc script --- .../build_erf_with_shoc_cuda_Perlmutter.sh | 91 +++++++++++++++++-- 1 file changed, 83 insertions(+), 8 deletions(-) mode change 100644 => 100755 Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh diff --git a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh old mode 100644 new mode 100755 index 1bf15c1b18..448cfb47c1 --- a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh +++ b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh @@ -2,18 +2,93 @@ set -e set -o pipefail -# 1. Resolve ERF_DIR to the repo root (one level up from this script) -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -export ERF_DIR="$(dirname "$SCRIPT_DIR")" +# Function to verify if a directory is the ERF repo root +verify_erf_dir() { + local dir=$1 + + # Check for basic structure + if [ ! -f "$dir/CMakeLists.txt" ] || [ ! -d "$dir/Source" ]; then + return 1 + fi + + # Check for "Energy Research and Forecasting" in key files + local found=0 + + if [ -f "$dir/README.rst" ]; then + if grep -q "Energy Research and Forecasting" "$dir/README.rst" 2>/dev/null; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/LICENSE.md" ]; then + if grep -q "Energy Research and Forecasting" "$dir/LICENSE.md" 2>/dev/null; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/CITATION.cff" ]; then + if grep -q "Energy Research and Forecasting" "$dir/CITATION.cff" 2>/dev/null; then + found=1 + fi + fi + + return $((1 - found)) +} -echo "ERF_DIR set to: $ERF_DIR" +# Function to find ERF repo root with multiple fallbacks +find_erf_dir() { + # Method 1: Use git to find repo root + if command -v git &> /dev/null; then + if git rev-parse --is-inside-work-tree &> /dev/null 2>&1; then + local git_root="$(git rev-parse --show-toplevel)" + if verify_erf_dir "$git_root"; then + ERF_DIR="$git_root" + echo "Detected ERF_DIR from git: $ERF_DIR" + return 0 + fi + fi + fi + + # Method 2: Try going up from script location + local script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + # Script is in Build/Perlmutter/, so go up 2 levels + local candidate="$(cd "$script_dir/../.." && pwd)" + if verify_erf_dir "$candidate"; then + ERF_DIR="$candidate" + echo "Detected ERF_DIR from script location: $ERF_DIR" + return 0 + fi + + # Method 3: Check current directory + if verify_erf_dir "$PWD"; then + ERF_DIR="$PWD" + echo "Detected ERF_DIR from current directory: $ERF_DIR" + return 0 + fi + + echo "Error: Could not auto-detect ERF_DIR" + echo "Verification requires:" + echo " - CMakeLists.txt and Source/ directory" + echo " - 'Energy Research and Forecasting' in README.rst, LICENSE.md, or CITATION.cff" + return 1 +} + +################################################################################### + +# 1. Resolve ERF_DIR +# Detect ERF_DIR +if ! find_erf_dir; then + exit 1 +fi + +export ERF_DIR E3SM_DIR="$ERF_DIR/external/E3SM" if [ ! -d "$E3SM_DIR" ]; then -echo "external/E3SM folder not found, running eamxx_clone.sh..." -source "$ERF_DIR/Build/GNU_Ekat/eamxx_clone.sh" + echo "external/E3SM folder not found, running eamxx_clone.sh..." + source "$ERF_DIR/Build/GNU_Ekat/eamxx_clone.sh" else -echo "external/E3SM folder already exists, skipping clone." + echo "external/E3SM folder already exists, skipping clone." fi # 3. Prepare build directory @@ -24,6 +99,6 @@ cp "$ERF_DIR/Build/Perlmutter/cmake_with_cuda_shoc_Perlmutter.sh" "$ERF_DIR/buil # 4. Move into build directory cd "$ERF_DIR/build" -# 5. Run cmake setup +# Run cmake setup echo "Running cmake_with_cuda_shoc_Perlmutter.sh..." source cmake_with_cuda_shoc_Perlmutter.sh From c98ab616fca2d948041bfb6b97310da87007c645 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Mon, 10 Nov 2025 10:48:57 -0800 Subject: [PATCH 17/44] Add machines --- Build/machines/aurora_erf.profile | 8 ++++++++ Build/machines/perlmutter_erf.profile | 4 ++++ Build/machines/polaris_erf.profile | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 Build/machines/aurora_erf.profile create mode 100644 Build/machines/perlmutter_erf.profile create mode 100644 Build/machines/polaris_erf.profile diff --git a/Build/machines/aurora_erf.profile b/Build/machines/aurora_erf.profile new file mode 100644 index 0000000000..8a4ada731f --- /dev/null +++ b/Build/machines/aurora_erf.profile @@ -0,0 +1,8 @@ +# required dependencies +module load cmake + +module load hdf5/1.14.6 +module load netcdf-cxx4 + +# necessary to use build or run with GPU-aware MPICH +# export MPIR_CVAR_ENABLE_GPU=1 \ No newline at end of file diff --git a/Build/machines/perlmutter_erf.profile b/Build/machines/perlmutter_erf.profile new file mode 100644 index 0000000000..29ebbda84b --- /dev/null +++ b/Build/machines/perlmutter_erf.profile @@ -0,0 +1,4 @@ +module load gcc-native/13.2 cmake cudatoolkit cray-hdf5-parallel cray-netcdf-hdf5parallel cray-libsci + +# Automatically included with module load gpu +# export MPICH_GPU_SUPPORT_ENABLED=1 \ No newline at end of file diff --git a/Build/machines/polaris_erf.profile b/Build/machines/polaris_erf.profile new file mode 100644 index 0000000000..6502c320e8 --- /dev/null +++ b/Build/machines/polaris_erf.profile @@ -0,0 +1,25 @@ +# swap to the Milan cray package +module load craype-x86-milan + +# extra modules +module use /soft/modulefiles +module load spack-pe-gnu + +# add cuda +module load cuda/12.6 +module load cudatoolkit-standalone/12.6 +module load craype-accel-nvidia80 + +# required dependencies +module load cmake + +# default gcc-native too new for cuda/12.6 +module load gcc-native/13.2 + +module load cray-hdf5-parallel +module load cray-libsci/25.03.0 + + +module load cray-netcdf-hdf5parallel + +# export MPICH_GPU_SUPPORT_ENABLED=1 \ No newline at end of file From b1c2314ffaf78c8c404b7dbca6a562473634908f Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Mon, 10 Nov 2025 10:56:29 -0800 Subject: [PATCH 18/44] Add script with no flags --- Build/cmake_with_kokkos_many.sh | 23 +++++++++++++++++++++++ Build/machines/perlmutter_erf.profile | 10 ++++++++++ 2 files changed, 33 insertions(+) create mode 100755 Build/cmake_with_kokkos_many.sh diff --git a/Build/cmake_with_kokkos_many.sh b/Build/cmake_with_kokkos_many.sh new file mode 100755 index 0000000000..1055fe7176 --- /dev/null +++ b/Build/cmake_with_kokkos_many.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +#Example cmake configuration script that assumes cray detection + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_FFT:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_RRTMGP:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=ON \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -B build_erf .. + +cmake --build build_erf -j10 -v +cmake --install build_erf --prefix=install_erf diff --git a/Build/machines/perlmutter_erf.profile b/Build/machines/perlmutter_erf.profile index 29ebbda84b..c60418c6b8 100644 --- a/Build/machines/perlmutter_erf.profile +++ b/Build/machines/perlmutter_erf.profile @@ -1,4 +1,14 @@ +#!/bin/bash + module load gcc-native/13.2 cmake cudatoolkit cray-hdf5-parallel cray-netcdf-hdf5parallel cray-libsci +#module load gcc-native/13.2 +#module load cray-mpich/8.1.30 +#module load cray-hdf5-parallel/1.14.3.1 +#module load cray-netcdf-hdf5parallel/4.9.0.13 +#module load cmake/3.30.2 +#module load cray-libsci/24.07.0 +#module load cray-parallel-netcdf/1.12.3.13 + # Automatically included with module load gpu # export MPICH_GPU_SUPPORT_ENABLED=1 \ No newline at end of file From 2f48a0872896a1a755545f69f2cf26459e63c909 Mon Sep 17 00:00:00 2001 From: Akash Dhruv Date: Mon, 10 Nov 2025 15:54:07 -0600 Subject: [PATCH 19/44] Documentation updates (#2703) --- Docs/sphinx_doc/CouplingToNoahMP.rst | 16 ++++++++-------- .../Noah-MP/prompts/noahmpio_update.toml | 2 +- Submodules/Noah-MP | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Docs/sphinx_doc/CouplingToNoahMP.rst b/Docs/sphinx_doc/CouplingToNoahMP.rst index 45067dac17..30657837e6 100644 --- a/Docs/sphinx_doc/CouplingToNoahMP.rst +++ b/Docs/sphinx_doc/CouplingToNoahMP.rst @@ -126,8 +126,8 @@ model (e.g., OpenAI, Argo, etc.). Tutorials are available at .. code-block:: bash code-scribe update NoahmpIO.H NoahmpIO.cpp NoahmpIO_fi.F90 \ - -r prompts/noahmpio_update.toml \ - -p "Write a natural language prompt with variable names, dimensions, etc." \ + -p prompts/noahmpio_update.toml \ + -q "Write a natural language prompt with variable names, dimensions, etc." \ -m 3. Run the following to generate or update bindings in **Source/LandSurfaceModel/Noah-MP** directory: @@ -135,8 +135,8 @@ model (e.g., OpenAI, Argo, etc.). Tutorials are available at .. code-block:: bash code-scribe update ERF_NOAHMP.cpp \ - -r prompts/noahmpio_update.toml \ - -p "Write a natural language prompt with variable names, dimensions, etc." \ + -p prompts/noahmpio_update.toml \ + -q "Write a natural language prompt with variable names, dimensions, etc." \ -m You may need to manually edit **Submodules/Noah-MP/drivers/erf/NoahmpIOVarType.F90** to replace: @@ -157,8 +157,8 @@ model’s context length) using: .. code-block:: bash code-scribe update NoahmpIOVarType.F90 \ - -r prompts/noahmpio_update.toml \ - -p "Write a natural language prompt with variable names, dimensions, etc." \ + -p prompts/noahmpio_update.toml \ + -q "Write a natural language prompt with variable names, dimensions, etc." \ -m If you want to control Noah-MP plot variables, you can update **Submodules/Noah-MP/drivers/erf/NoahmpWriteLandMod.F90** file: @@ -166,6 +166,6 @@ If you want to control Noah-MP plot variables, you can update **Submodules/Noah- .. code-block:: bash code-scribe update NoahmpWriteLandMod.F90 \ - -r prompts/noahmpwriteland_update.toml \ - -p "Write a natural language prompt with variable names, dimensions, etc." \ + -p prompts/noahmpwriteland_update.toml \ + -q "Write a natural language prompt with variable names, dimensions, etc." \ -m diff --git a/Source/LandSurfaceModel/Noah-MP/prompts/noahmpio_update.toml b/Source/LandSurfaceModel/Noah-MP/prompts/noahmpio_update.toml index c8c0501973..cdc6ef230c 100644 --- a/Source/LandSurfaceModel/Noah-MP/prompts/noahmpio_update.toml +++ b/Source/LandSurfaceModel/Noah-MP/prompts/noahmpio_update.toml @@ -1,6 +1,6 @@ # Usage: code-scribe update ERF_NOAHMP.cpp \ # -p prompts/noahmpio_update.toml \ -# -p "Write a natural language prompt with variable name dimension etc." \ +# -q "Write a natural language prompt with variable name dimension etc." \ # -m [[chat.user]] diff --git a/Submodules/Noah-MP b/Submodules/Noah-MP index 128b02d0d4..6dc16027de 160000 --- a/Submodules/Noah-MP +++ b/Submodules/Noah-MP @@ -1 +1 @@ -Subproject commit 128b02d0d4ffbe6d69b799f65bf670d1e9966d7b +Subproject commit 6dc16027de56f2abc9fe430016acd086625e9ae2 From 0115dacf86bbef61e7e98fab6525433b3e304e02 Mon Sep 17 00:00:00 2001 From: Soonpil Kang <109235650+skang67@users.noreply.github.com> Date: Mon, 10 Nov 2025 14:43:32 -0800 Subject: [PATCH 20/44] Use cell-centered grid for EB area fraction and face centroid (#2702) * Apply FillBoundary for momenta in slow_rhs_post. * check on domain boundaries not box boundaries * replace bx lo/hi by domain lo/hi * revert incorrect change in last commit * fix test on small cells * Added wrappers for non-const EB factory members. * Use cell-centered grids for eb_aux_ area fraction and face centroids. * Corrected rayleigh damping thickness in inputs_FittedMesh. * Remove FB from slow_rhs_post, since we do this in erf_slow_rhs_pre. --------- Co-authored-by: Ann Almgren --- .../WitchOfAgnesi/inputs_FittedMesh | 4 +- Source/EB/ERF_EB.H | 40 ++++++++++++++++++- Source/EB/ERF_EBAux.cpp | 39 ++++++++---------- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/Exec/DryRegTests/WitchOfAgnesi/inputs_FittedMesh b/Exec/DryRegTests/WitchOfAgnesi/inputs_FittedMesh index 7010714e1b..cea1375759 100644 --- a/Exec/DryRegTests/WitchOfAgnesi/inputs_FittedMesh +++ b/Exec/DryRegTests/WitchOfAgnesi/inputs_FittedMesh @@ -64,8 +64,8 @@ erf.abl_driver_type = "PressureGradient" erf.abl_pressure_grad = -0.02 0. 0. erf.rayleigh_damp_W = true -erf.rayleigh_zdamp = 5000.0 -erf.rayleigh_dampcoef = 0.2 +erf.rayleigh_zdamp = 50.0 +erf.rayleigh_dampcoef = 0.25 #erf.init_type = "input_sounding" #erf.init_sounding_ideal = true diff --git a/Source/EB/ERF_EB.H b/Source/EB/ERF_EB.H index fd6d240756..28aab3491c 100644 --- a/Source/EB/ERF_EB.H +++ b/Source/EB/ERF_EB.H @@ -99,8 +99,44 @@ class eb_ { inline amrex::FabArray& getNonConstEBCellFlags(const amrex::EBFArrayBoxFactory& ebfact) { - const amrex::FabArray& flags_const = ebfact.getMultiEBCellFlagFab(); - return const_cast&>(flags_const); + const amrex::FabArray& flags_const = ebfact.getMultiEBCellFlagFab(); + return const_cast&>(flags_const); + } + + inline amrex::MultiFab& + getNonConstVolFrac(const amrex::EBFArrayBoxFactory& ebfact) + { + const amrex::MultiFab& vfrac_const = ebfact.getVolFrac(); + return const_cast(vfrac_const); + } + + inline amrex::MultiCutFab& + getNonConstCentroid(const amrex::EBFArrayBoxFactory& ebfact) + { + const amrex::MultiCutFab& vcent_const = ebfact.getCentroid(); + return const_cast(vcent_const); + } + + inline amrex::Array + getNonConstAreaFrac(const amrex::EBFArrayBoxFactory& ebfact) + { + auto afrac_const = ebfact.getAreaFrac(); + amrex::Array afrac; + for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) { + afrac[dir] = const_cast(afrac_const[dir]); + } + return afrac; + } + + inline amrex::Array + getNonConstFaceCent(const amrex::EBFArrayBoxFactory& ebfact) + { + auto fcent_const = ebfact.getFaceCent(); + amrex::Array fcent; + for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) { + fcent[dir] = const_cast(fcent_const[dir]); + } + return fcent; } }; diff --git a/Source/EB/ERF_EBAux.cpp b/Source/EB/ERF_EBAux.cpp index 584a547c05..5922d3e516 100644 --- a/Source/EB/ERF_EBAux.cpp +++ b/Source/EB/ERF_EBAux.cpp @@ -53,14 +53,8 @@ define( [[maybe_unused]] int const& a_level, m_volcent = new MultiFab(my_grids, a_dmap, AMREX_SPACEDIM, a_ngrow[2], MFInfo(), FArrayBoxFactory()); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const BoxArray& faceba = amrex::convert(a_grids, IntVect::TheDimensionVector(idim)); - if (idim == a_idim) { - m_areafrac[idim] = new MultiFab(a_grids, a_dmap, 1, a_ngrow[1]+1, MFInfo(), FArrayBoxFactory()); - m_facecent[idim] = new MultiFab(a_grids, a_dmap, AMREX_SPACEDIM-1, a_ngrow[2], MFInfo(), FArrayBoxFactory()); - } else { - m_areafrac[idim] = new MultiFab(faceba, a_dmap, 1, a_ngrow[1], MFInfo(), FArrayBoxFactory()); - m_facecent[idim] = new MultiFab(faceba, a_dmap, AMREX_SPACEDIM-1, a_ngrow[2], MFInfo(), FArrayBoxFactory()); - } + m_areafrac[idim] = new MultiFab(a_grids, a_dmap, 1, a_ngrow[1]+1, MFInfo(), FArrayBoxFactory()); + m_facecent[idim] = new MultiFab(a_grids, a_dmap, AMREX_SPACEDIM-1, a_ngrow[2], MFInfo(), FArrayBoxFactory()); } m_bndryarea = new MultiFab(my_grids, a_dmap, 1, a_ngrow[2], MFInfo(), FArrayBoxFactory()); @@ -848,23 +842,22 @@ define( [[maybe_unused]] int const& a_level, for (MFIter mfi(*m_cellflags, false); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - const Box& bx_grown = mfi.growntilebox(); - - Array4 const& aux_flag = m_cellflags->array(mfi); - Array4 const& aux_vfrac = m_volfrac->array(mfi); - Array4 const& aux_afrac_x = m_areafrac[0]->array(mfi); - Array4 const& aux_afrac_y = m_areafrac[1]->array(mfi); - Array4 const& aux_afrac_z = m_areafrac[2]->array(mfi); + const Box& bx = mfi.validbox(); + const Box& bx_grown = mfi.growntilebox(); - Array4 const& aux_vcent = m_volcent->array(mfi); - Array4 const& aux_fcent_x = m_facecent[0]->array(mfi); - Array4 const& aux_fcent_y = m_facecent[1]->array(mfi); - Array4 const& aux_fcent_z = m_facecent[2]->array(mfi); - Array4 const& aux_barea = m_bndryarea->array(mfi); - Array4 const& aux_bcent = m_bndrycent->array(mfi); - Array4 const& aux_bnorm = m_bndrynorm->array(mfi); + Array4 const& aux_flag = m_cellflags->array(mfi); + Array4 const& aux_vfrac = m_volfrac->array(mfi); + Array4 const& aux_afrac_x = m_areafrac[0]->array(mfi); + Array4 const& aux_afrac_y = m_areafrac[1]->array(mfi); + Array4 const& aux_afrac_z = m_areafrac[2]->array(mfi); + Array4 const& aux_vcent = m_volcent->array(mfi); + Array4 const& aux_fcent_x = m_facecent[0]->array(mfi); + Array4 const& aux_fcent_y = m_facecent[1]->array(mfi); + Array4 const& aux_fcent_z = m_facecent[2]->array(mfi); + Array4 const& aux_barea = m_bndryarea->array(mfi); + Array4 const& aux_bcent = m_bndrycent->array(mfi); + Array4 const& aux_bnorm = m_bndrynorm->array(mfi); if (FlagFab[mfi].getType(bx) == FabType::singlevalued ) { From 5d83c3389fa0ffb40c77e2eed5c7d8ec4316a596 Mon Sep 17 00:00:00 2001 From: "Aaron M. Lattanzi" <103702284+AMLattanzi@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:09:37 -0800 Subject: [PATCH 21/44] Fix upwind real bcs. (#2705) --- .../ERF_BoundaryConditionsRealbdy.cpp | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/Source/BoundaryConditions/ERF_BoundaryConditionsRealbdy.cpp b/Source/BoundaryConditions/ERF_BoundaryConditionsRealbdy.cpp index bb7d5a83ba..c81ad955a5 100644 --- a/Source/BoundaryConditions/ERF_BoundaryConditionsRealbdy.cpp +++ b/Source/BoundaryConditions/ERF_BoundaryConditionsRealbdy.cpp @@ -380,16 +380,27 @@ ERF::fill_from_realbdy_upwind (const Vector& mfs, const Array4& u_arr = mf_u.array(mfi); const Array4& v_arr = mf_v.array(mfi); + auto lb_u = lbound(u_arr); lb_u.x += ngvect_vels[0]; lb_u.y += ngvect_vels[1]; + auto ub_u = ubound(u_arr); ub_u.x -= ngvect_vels[0]; ub_u.y -= ngvect_vels[1]; + auto lb_v = lbound(v_arr); lb_v.x += ngvect_vels[0]; lb_v.y += ngvect_vels[1]; + auto ub_v = ubound(v_arr); ub_v.x -= ngvect_vels[0]; ub_v.y -= ngvect_vels[1]; + // NOTE: Xlo/hi boxes own corner cells (Ylo/hi) ParallelFor(bx_xlo, bx_xhi, [=] AMREX_GPU_DEVICE (int i, int j, int k) { + // Limit for BDY FAB data int ii = std::max(i , dom_lo.x); int jj = std::max(j , dom_lo.y); jj = std::min(jj, dom_hi.y); - if ( (u_arr(dom_lo.x,jj,k) >= 0.0) || - ((jj == dom_lo.y) && (v_arr(ii,dom_lo.y ,k) >= 0.0)) || - ((jj == dom_hi.y) && (v_arr(ii,dom_hi.y+1,k) <= 0.0)) ) { + + // Limit for u_arr and v_arr + int ju = std::min(std::max(j, lb_u.y), ub_u.y); + int iv = std::min(std::max(i, lb_v.x), ub_v.x); + + if ( (u_arr(dom_lo.x,ju,k) >= 0.0) || + ((jj == dom_lo.y) && (v_arr(iv,dom_lo.y ,k) >= 0.0)) || + ((jj == dom_hi.y) && (v_arr(iv,dom_hi.y+1,k) <= 0.0)) ) { dest_arr(i,j,k,comp_idx) = oma * bdatxlo_n (ii,jj,k,0) + alpha * bdatxlo_np1(ii,jj,k,0); if (var_idx == Vars::cons) { @@ -401,12 +412,18 @@ ERF::fill_from_realbdy_upwind (const Vector& mfs, }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { + // Limit for BDY FAB data int ii = std::min(i , dom_hi.x); int jj = std::max(j , dom_lo.y); jj = std::min(jj, dom_hi.y); - if ( (u_arr(dom_hi.x+1,jj,k) <= 0.0) || - ((jj == dom_lo.y) && (v_arr(ii,dom_lo.y ,k) >= 0.0)) || - ((jj == dom_hi.y) && (v_arr(ii,dom_hi.y+1,k) <= 0.0)) ) { + + // Limit for u_arr and v_arr + int ju = std::min(std::max(j, lb_u.y), ub_u.y); + int iv = std::min(std::max(i, lb_v.x), ub_v.x); + + if ( (u_arr(dom_hi.x+1,ju,k) <= 0.0) || + ((jj == dom_lo.y) && (v_arr(iv,dom_lo.y ,k) >= 0.0)) || + ((jj == dom_hi.y) && (v_arr(iv,dom_hi.y+1,k) <= 0.0)) ) { dest_arr(i,j,k,comp_idx) = oma * bdatxhi_n (ii,jj,k,0) + alpha * bdatxhi_np1(ii,jj,k,0); if (var_idx == Vars::cons) { @@ -423,8 +440,13 @@ ERF::fill_from_realbdy_upwind (const Vector& mfs, ParallelFor(bx_ylo, bx_yhi, [=] AMREX_GPU_DEVICE (int i, int j, int k) { + // Limit for BDY FAB data int jj = std::max(j, dom_lo.y); - if (v_arr(i,dom_lo.y,k) >= 0.0) { + + // Limit for v_arr + int iv = std::min(std::max(i, lb_v.x), ub_v.x); + + if (v_arr(iv,dom_lo.y,k) >= 0.0) { dest_arr(i,j,k,comp_idx) = oma * bdatylo_n (i,jj,k,0) + alpha * bdatylo_np1(i,jj,k,0); if (var_idx == Vars::cons) { @@ -436,8 +458,13 @@ ERF::fill_from_realbdy_upwind (const Vector& mfs, }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { + // Limit for BDY FAB data int jj = std::min(j, dom_hi.y); - if (v_arr(i,dom_hi.y+1,k) <= 0.0) { + + // Limit for v_arr + int iv = std::min(std::max(i, lb_v.x), ub_v.x); + + if (v_arr(iv,dom_hi.y+1,k) <= 0.0) { dest_arr(i,j,k,comp_idx) = oma * bdatyhi_n (i,jj,k,0) + alpha * bdatyhi_np1(i,jj,k,0); if (var_idx == Vars::cons) { From fd0c030a8672c673dd4ffe01121fd79748356518 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Mon, 10 Nov 2025 15:56:51 -0800 Subject: [PATCH 22/44] Add pkg-config deps if needed for netcdf --- CMake/FindNetCDF.cmake | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CMake/FindNetCDF.cmake b/CMake/FindNetCDF.cmake index a2091a906f..f0ab7a9ca0 100644 --- a/CMake/FindNetCDF.cmake +++ b/CMake/FindNetCDF.cmake @@ -59,6 +59,22 @@ find_library(NETCDF_LIBRARIES_C NAMES netcdf $ENV{NETCDF_DIR}/lib) mark_as_advanced(NETCDF_LIBRARIES_C) +if(NETCDF_LIBRARIES_C) + # First check if pkg-config told us about dependencies + if(NETCDF_LINK_LIBRARIES) + # Use pkg-config's complete dependency list + set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) + message(STATUS "NetCDF dependencies from pkg-config: ${NETCDF_LINK_LIBRARIES}") + else() + # Fallback: try to find HDF5 manually + find_package(HDF5 QUIET COMPONENTS C HL) + if(HDF5_FOUND) + list(APPEND NETCDF_LIBRARIES_C ${HDF5_LIBRARIES}) + message(STATUS "Added HDF5 libraries to NetCDF") + endif() + endif() +endif() + set(NetCDF_has_interfaces "YES") # will be set to NO if we're missing any interfaces set(NetCDF_libs "${NETCDF_LIBRARIES_C}") From 274134c5fdd850fe817c9f7f7675f580ace23990 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Mon, 10 Nov 2025 16:47:41 -0800 Subject: [PATCH 23/44] Separate shoc script --- Build/cmake_with_kokkos_many.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Build/cmake_with_kokkos_many.sh b/Build/cmake_with_kokkos_many.sh index 1055fe7176..36c3d04704 100755 --- a/Build/cmake_with_kokkos_many.sh +++ b/Build/cmake_with_kokkos_many.sh @@ -10,7 +10,7 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ -DERF_ENABLE_NETCDF:BOOL=ON \ -DERF_ENABLE_HDF5:BOOL=ON \ -DERF_ENABLE_RRTMGP:BOOL=ON \ - -DERF_ENABLE_SHOC:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=OFF \ -DERF_ENABLE_MPI:BOOL=ON \ -DERF_ENABLE_CUDA:BOOL=ON \ -DERF_ENABLE_TESTS:BOOL=ON \ From 791c1e0eb62062fde922d1fe70fea3368df3fe43 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 04:42:44 -0800 Subject: [PATCH 24/44] Tweak FindNetCDF.cmake for other options --- CMake/FindNetCDF.cmake | 66 ++++++++++++++++++++++++++++++------------ CMakeLists.txt | 16 +++++----- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/CMake/FindNetCDF.cmake b/CMake/FindNetCDF.cmake index f0ab7a9ca0..89faabdcaa 100644 --- a/CMake/FindNetCDF.cmake +++ b/CMake/FindNetCDF.cmake @@ -20,7 +20,7 @@ if (NETCDF_INCLUDES AND NETCDF_LIBRARIES) set (NETCDF_FIND_QUIETLY TRUE) endif (NETCDF_INCLUDES AND NETCDF_LIBRARIES) -# Build hints from user variables first, then pkg-config +# Build hints from user variables first set(NETCDF_INCLUDE_HINTS) set(NETCDF_LIBRARY_HINTS) @@ -37,19 +37,33 @@ if(NETCDF_LIBRARY_DIR) list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIR}) endif() +# Use pkg-config to get hints set(ENV{PKG_CONFIG_PATH} "$ENV{MPICH_DIR}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") message(STATUS "PKG_CONFIG_PATH = $ENV{PKG_CONFIG_PATH}") -find_package(PkgConfig REQUIRED QUIET) -pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf) -if(NOT NETCDF_FOUND) - pkg_check_modules(NETCDF REQUIRED IMPORTED_TARGET netcdf-cxx4_parallel) -endif() +find_package(PkgConfig QUIET) +if(PKG_CONFIG_FOUND) + # Try multiple NetCDF variants in order of preference + pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf) + if(NOT NETCDF_FOUND) + pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf-mpi) + endif() + if(NOT NETCDF_FOUND) + pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf_parallel) + endif() + if(NOT NETCDF_FOUND) + pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf-cxx4_parallel) + endif() -# Add pkg-config results to hints -list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIRS}) -list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIRS}) + if(NETCDF_FOUND) + message(STATUS "Found NetCDF via pkg-config: ${NETCDF_MODULE_NAME}") + # Add pkg-config results to hints + list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIRS}) + list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIRS}) + endif() +endif() +# Try CMake's find_library using hints find_path(NETCDF_INCLUDES netcdf.h HINTS ${NETCDF_INCLUDE_HINTS} $ENV{NETCDF_DIR}/include) @@ -59,20 +73,34 @@ find_library(NETCDF_LIBRARIES_C NAMES netcdf $ENV{NETCDF_DIR}/lib) mark_as_advanced(NETCDF_LIBRARIES_C) +# If find_library succeeded, check if we need HDF5 if(NETCDF_LIBRARIES_C) - # First check if pkg-config told us about dependencies + # Only add HDF5 if pkg-config told us NetCDF needs it if(NETCDF_LINK_LIBRARIES) - # Use pkg-config's complete dependency list - set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) - message(STATUS "NetCDF dependencies from pkg-config: ${NETCDF_LINK_LIBRARIES}") - else() - # Fallback: try to find HDF5 manually - find_package(HDF5 QUIET COMPONENTS C HL) - if(HDF5_FOUND) - list(APPEND NETCDF_LIBRARIES_C ${HDF5_LIBRARIES}) - message(STATUS "Added HDF5 libraries to NetCDF") + # Check if pkg-config's library list includes hdf5 + string(FIND "${NETCDF_LINK_LIBRARIES}" "hdf5" HDF5_IN_NETCDF) + if(HDF5_IN_NETCDF GREATER -1) + message(STATUS "NetCDF was built with HDF5 support") + # Check if HDF5 was already found (e.g., by AMReX) + if(TARGET hdf5::hdf5 OR HDF5_FOUND) + list(APPEND NETCDF_LIBRARIES_C ${HDF5_LIBRARIES}) + message(STATUS " Using HDF5 libraries (already found): ${HDF5_LIBRARIES}") + else() + # Fallback: use pkg-config's complete library list which includes HDF5 + set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) + message(STATUS " HDF5 not already a target, using pkg-config's complete library list:") + message(STATUS " NETCDF_LIBRARIES_C = ${NETCDF_LINK_LIBRARIES}") + endif() + else() + message(STATUS "NetCDF was built without HDF5 support") endif() + else() + message(STATUS "No pkg-config information available; assuming NetCDF doesn't need HDF5") endif() +# FALLBACK: If find_library failed but pkg-config succeeded, use pkg-config's library list +elseif(NETCDF_FOUND AND NETCDF_LINK_LIBRARIES) + set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) + message(STATUS "Using NetCDF libraries from pkg-config: ${NETCDF_LINK_LIBRARIES}") endif() set(NetCDF_has_interfaces "YES") # will be set to NO if we're missing any interfaces diff --git a/CMakeLists.txt b/CMakeLists.txt index e716b4fee2..b41f01293f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,7 +207,7 @@ endif() if(ERF_ENABLE_MPI) # Check if we're on Cray with bare MPI wrappers (which will hang) set(SKIP_MPI_DETECTION FALSE) - + if(DEFINED ENV{CRAYPE_VERSION} OR DEFINED ENV{CRAY_MPICH_DIR}) # On Cray system - check if using problematic bare MPI wrappers if(CMAKE_CXX_COMPILER MATCHES "mpicxx" OR @@ -217,11 +217,11 @@ if(ERF_ENABLE_MPI) set(SKIP_MPI_DETECTION TRUE) endif() endif() - + if(SKIP_MPI_DETECTION) # Workaround: Manual MPI setup (avoids hang) message(STATUS "Manually configuring MPI (bypassing find_package)...") - + # Get Cray MPICH version for informational purposes set(MPICH_VERSION "UNKNOWN") if(DEFINED ENV{CRAY_MPICH_VERSION}) @@ -229,26 +229,26 @@ if(ERF_ENABLE_MPI) elseif(DEFINED ENV{CRAY_MPICH_VER}) set(MPICH_VERSION "$ENV{CRAY_MPICH_VER}") endif() - + # Cray MPICH 8.x supports MPI 3.1 standard set(MPI_VERSION "3.1") - + # Create MPI targets if(NOT TARGET MPI::MPI_CXX) add_library(MPI::MPI_CXX INTERFACE IMPORTED) endif() - + if(NOT TARGET MPI::MPI_C) add_library(MPI::MPI_C INTERFACE IMPORTED) endif() - + # Set MPI variables set(MPI_FOUND TRUE) set(MPI_CXX_FOUND TRUE) set(MPI_C_FOUND TRUE) set(MPI_C_VERSION "${MPI_VERSION}") set(MPI_CXX_VERSION "${MPI_VERSION}") - + message(STATUS " Cray MPICH implementation: ${MPICH_VERSION}") message(STATUS " MPI API standard: ${MPI_VERSION}") message(STATUS " Created MPI::MPI_CXX and MPI::MPI_C targets") From e4cfb699582a75149eb7172887e94f2175f2d4df Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 04:46:55 -0800 Subject: [PATCH 25/44] Style --- .../build_erf_with_shoc_cuda_Perlmutter.sh | 20 ++++++------- .../cmake_with_shoc_netcdf_perlmutter.sh | 2 +- Build/setup_cmake_validation.sh | 30 +++++++++---------- Build/wrapper_clean_build.sh | 24 +++++++-------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh index 448cfb47c1..1d577bd56e 100755 --- a/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh +++ b/Build/Perlmutter/build_erf_with_shoc_cuda_Perlmutter.sh @@ -5,33 +5,33 @@ set -o pipefail # Function to verify if a directory is the ERF repo root verify_erf_dir() { local dir=$1 - + # Check for basic structure if [ ! -f "$dir/CMakeLists.txt" ] || [ ! -d "$dir/Source" ]; then return 1 fi - + # Check for "Energy Research and Forecasting" in key files local found=0 - + if [ -f "$dir/README.rst" ]; then if grep -q "Energy Research and Forecasting" "$dir/README.rst" 2>/dev/null; then found=1 fi fi - + if [ $found -eq 0 ] && [ -f "$dir/LICENSE.md" ]; then if grep -q "Energy Research and Forecasting" "$dir/LICENSE.md" 2>/dev/null; then found=1 fi fi - + if [ $found -eq 0 ] && [ -f "$dir/CITATION.cff" ]; then if grep -q "Energy Research and Forecasting" "$dir/CITATION.cff" 2>/dev/null; then found=1 fi fi - + return $((1 - found)) } @@ -48,7 +48,7 @@ find_erf_dir() { fi fi fi - + # Method 2: Try going up from script location local script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Script is in Build/Perlmutter/, so go up 2 levels @@ -58,14 +58,14 @@ find_erf_dir() { echo "Detected ERF_DIR from script location: $ERF_DIR" return 0 fi - + # Method 3: Check current directory if verify_erf_dir "$PWD"; then ERF_DIR="$PWD" echo "Detected ERF_DIR from current directory: $ERF_DIR" return 0 fi - + echo "Error: Could not auto-detect ERF_DIR" echo "Verification requires:" echo " - CMakeLists.txt and Source/ directory" @@ -75,7 +75,7 @@ find_erf_dir() { ################################################################################### -# 1. Resolve ERF_DIR +# 1. Resolve ERF_DIR # Detect ERF_DIR if ! find_erf_dir; then exit 1 diff --git a/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh b/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh index 7e04de356f..7f9fb88262 100644 --- a/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh +++ b/Build/Perlmutter/cmake_with_shoc_netcdf_perlmutter.sh @@ -3,7 +3,7 @@ # Load the needed modules module load gcc-native cmake cray-mpich cray-libsci cray-hdf5-parallel cray-netcdf-hdf5parallel -# Deactive GPU aware MPI for CPU build +# Deactivate GPU aware MPI for CPU build export MPICH_GPU_SUPPORT_ENABLED=0 export CRAY_ACCEL_TARGET=none diff --git a/Build/setup_cmake_validation.sh b/Build/setup_cmake_validation.sh index 2c534e13b7..48b0949147 100755 --- a/Build/setup_cmake_validation.sh +++ b/Build/setup_cmake_validation.sh @@ -6,33 +6,33 @@ set -o pipefail # Function to verify if a directory is the ERF repo root verify_erf_dir() { local dir=$1 - + # Check for basic structure if [ ! -f "$dir/CMakeLists.txt" ] || [ ! -d "$dir/Source" ]; then return 1 fi - + # Check for "Energy Research and Forecasting" in key files local found=0 - + if [ -f "$dir/README.rst" ]; then if grep -q "Energy Research and Forecasting" "$dir/README.rst" 2>/dev/null || true; then found=1 fi fi - + if [ $found -eq 0 ] && [ -f "$dir/LICENSE.md" ]; then if grep -q "Energy Research and Forecasting" "$dir/LICENSE.md" 2>/dev/null || true; then found=1 fi fi - + if [ $found -eq 0 ] && [ -f "$dir/CITATION.cff" ]; then if grep -q "Energy Research and Forecasting" "$dir/CITATION.cff" 2>/dev/null || true; then found=1 fi fi - + return $((1 - found)) } @@ -47,7 +47,7 @@ find_erf_dir() { return 0 fi fi - + # Method 2: Use git to find repo root if command -v git &> /dev/null; then if git rev-parse --is-inside-work-tree &> /dev/null 2>&1; then @@ -59,7 +59,7 @@ find_erf_dir() { fi fi fi - + # Method 3: Try going up from script location local script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Check if script is in Build/ directory @@ -71,14 +71,14 @@ find_erf_dir() { return 0 fi fi - + # Method 4: Check current directory if verify_erf_dir "$PWD"; then ERF_DIR="$PWD" echo "Detected ERF_DIR from current directory: $ERF_DIR" return 0 fi - + return 1 } @@ -177,22 +177,22 @@ for script in "$SRC_DIR"/*.sh; do if [ ! -f "$script" ]; then continue fi - + basename_script=$(basename "$script") - + # Skip backup files if [[ "$basename_script" =~ ~$ ]]; then SKIPPED=$((SKIPPED + 1)) continue fi - + # Check if it's an ERF cmake script (contains DERF or cmake) has_derf=0 has_cmake=0 - + grep -q "DERF" "$script" 2>/dev/null && has_derf=1 grep -q "cmake" "$script" 2>/dev/null && has_cmake=1 - + if [ $has_derf -eq 1 ] || [ $has_cmake -eq 1 ]; then cp "$script" "$BUILD_DIR/" chmod +x "$BUILD_DIR/$basename_script" diff --git a/Build/wrapper_clean_build.sh b/Build/wrapper_clean_build.sh index fb53a06427..4077234ad8 100755 --- a/Build/wrapper_clean_build.sh +++ b/Build/wrapper_clean_build.sh @@ -62,7 +62,7 @@ set -e # CLEANUP BEHAVIOR (GNU Make Standard): # ------------------------------------- # This script performs a 'distclean' equivalent operation, which per GNU -# standards means: "Delete all files in the current directory (or created +# standards means: "Delete all files in the current directory (or created # by this makefile) that are created by configuring or building the program." # # For CMake, this includes: @@ -126,7 +126,7 @@ done # Generated project config [ -f "ERFConfig.cmake" ] && FILES_TO_DELETE="$FILES_TO_DELETE ERFConfig.cmake" -# pkg-config files +# pkg-config files find . -maxdepth 1 -name "*.pc" -type f 2>/dev/null | while read -r f; do FILES_TO_DELETE="$FILES_TO_DELETE $f" done @@ -146,11 +146,11 @@ INSTALL_DIR="" if [ -f "CMakeCache.txt" ]; then # Extract CMAKE_INSTALL_PREFIX from cache INSTALL_PREFIX=$(grep "^CMAKE_INSTALL_PREFIX:" CMakeCache.txt | cut -d'=' -f2) - + if [ -n "$INSTALL_PREFIX" ] && [ -d "$INSTALL_PREFIX" ]; then # Convert to absolute path for comparison INSTALL_DIR=$(cd "$INSTALL_PREFIX" 2>/dev/null && pwd || echo "$INSTALL_PREFIX") - + # Check if it's a subdirectory of current directory (local install) CURRENT_DIR=$(pwd) if [[ "$INSTALL_DIR" == "$CURRENT_DIR"/* ]]; then @@ -158,7 +158,7 @@ if [ -f "CMakeCache.txt" ]; then INSTALL_DIR_RELATIVE=$(realpath --relative-to="$CURRENT_DIR" "$INSTALL_DIR" 2>/dev/null || \ python3 -c "import os.path; print(os.path.relpath('$INSTALL_DIR', '$CURRENT_DIR'))" 2>/dev/null || \ echo "$INSTALL_DIR") - + echo "" echo "==========================================" echo "Install Directory Detected" @@ -169,7 +169,7 @@ if [ -f "CMakeCache.txt" ]; then echo "This directory contains installed artifacts and is separate" echo "from the build configuration (distclean does NOT remove it)." echo "" - + if [ -d "$INSTALL_DIR" ]; then read -p "Also remove install directory? [y/N] " -n 1 -r echo @@ -210,11 +210,11 @@ else echo "Note: Install directories (if any) will NOT be deleted." echo " Use 'cmake --install --prefix ...' to manage installations." echo "" - + # Prompt user read -p "Delete these files/directories? [y/N] " -n 1 -r echo - + if [[ ! $REPLY =~ ^[Yy]$ ]]; then echo "Aborted by user. Not deleting anything." echo "" @@ -227,7 +227,7 @@ else echo " 3. Install: cmake --install --prefix " exit 1 fi - + # Actually delete echo "" echo "Performing distclean..." @@ -241,9 +241,9 @@ else # Clean install directory if requested if [ -n "$CLEAN_INSTALL_DIR" ]; then - echo "" - echo "Removing install directory..." - rm -rf "$CLEAN_INSTALL_DIR" && echo " ✓ Deleted: $CLEAN_INSTALL_DIR" + echo "" + echo "Removing install directory..." + rm -rf "$CLEAN_INSTALL_DIR" && echo " ✓ Deleted: $CLEAN_INSTALL_DIR" fi echo "" From 448dbc6bd458f9a60df5791861bf9aad2f4f22e9 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 05:07:56 -0800 Subject: [PATCH 26/44] Cleanup chars --- Build/setup_cmake_validation.sh | 6 +++--- Build/wrapper_clean_build.sh | 6 +++--- Build/wrapper_clean_build_auto.sh | 12 ++++++------ CMake/CrayDetection.cmake | 16 ++++++++-------- CMake/UtilityTargets.cmake | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Build/setup_cmake_validation.sh b/Build/setup_cmake_validation.sh index 48b0949147..db40ed3d25 100755 --- a/Build/setup_cmake_validation.sh +++ b/Build/setup_cmake_validation.sh @@ -196,10 +196,10 @@ for script in "$SRC_DIR"/*.sh; do if [ $has_derf -eq 1 ] || [ $has_cmake -eq 1 ]; then cp "$script" "$BUILD_DIR/" chmod +x "$BUILD_DIR/$basename_script" - echo " ✓ $basename_script" + echo " DONE: $basename_script" COPIED=$((COPIED + 1)) else - echo " ✗ $basename_script (no DERF or cmake found)" + echo " ERROR: $basename_script (no DERF or cmake found)" SKIPPED=$((SKIPPED + 1)) fi done @@ -258,7 +258,7 @@ if [ $# -ne 1 ]; then for i in "${!SCRIPTS[@]}"; do script_base="${SCRIPTS[$i]%.sh}" printf "%3d: %s\n" $((i+1)) "${SCRIPTS[$i]}" - printf " → subdirectory: %s/script_%s/\n" "$ERF_DIR" "$script_base" + printf " -> subdirectory: %s/script_%s/\n" "$ERF_DIR" "$script_base" done echo "" echo "Each script will run in its own clean subdirectory at ERF root." diff --git a/Build/wrapper_clean_build.sh b/Build/wrapper_clean_build.sh index 4077234ad8..8dec278538 100755 --- a/Build/wrapper_clean_build.sh +++ b/Build/wrapper_clean_build.sh @@ -233,9 +233,9 @@ else echo "Performing distclean..." for f in $FILES_TO_DELETE; do if [ -d "$f" ]; then - rm -rf "$f" && echo " ✓ Deleted directory: $f" + rm -rf "$f" && echo " DONE: Deleted directory: $f" elif [ -f "$f" ]; then - rm -f "$f" && echo " ✓ Deleted file: $f" + rm -f "$f" && echo " DONE: Deleted file: $f" fi done @@ -243,7 +243,7 @@ else if [ -n "$CLEAN_INSTALL_DIR" ]; then echo "" echo "Removing install directory..." - rm -rf "$CLEAN_INSTALL_DIR" && echo " ✓ Deleted: $CLEAN_INSTALL_DIR" + rm -rf "$CLEAN_INSTALL_DIR" && echo " DONE: Deleted: $CLEAN_INSTALL_DIR" fi echo "" diff --git a/Build/wrapper_clean_build_auto.sh b/Build/wrapper_clean_build_auto.sh index 3e024a23c6..df0b483a7e 100755 --- a/Build/wrapper_clean_build_auto.sh +++ b/Build/wrapper_clean_build_auto.sh @@ -68,23 +68,23 @@ rm -f lib*.a lib*.so 2>/dev/null || true # Delete build logs rm -f build_*.log git-state.txt 2>/dev/null || true -echo "✓ Cleaned: CMake configuration files" -echo "✓ Cleaned: Build artifacts (Exec/, Submodules/, Tests/, bin/, etc.)" -echo "✓ Cleaned: Libraries (lib*.a, lib*.so)" -echo "✓ Cleaned: Build logs and editor backups" +echo " DONE: Cleaned: CMake configuration files" +echo " DONE: Cleaned: Build artifacts (Exec/, Submodules/, Tests/, bin/, etc.)" +echo " DONE: Cleaned: Libraries (lib*.a, lib*.so)" +echo " DONE: Cleaned: Build logs and editor backups" # Clean install directory if requested if [ -n "$INSTALL_DIR" ]; then echo "" echo "Cleaning install directory (CLEAN_INSTALL=yes): $INSTALL_DIR" - rm -rf "$INSTALL_DIR" && echo "✓ Deleted: $INSTALL_DIR" + rm -rf "$INSTALL_DIR" && echo " DONE: Deleted: $INSTALL_DIR" elif [ -f "CMakeCache.txt.deleted" ]; then echo "" echo "Install directory NOT cleaned (use CLEAN_INSTALL=yes to clean)" fi echo "" -echo "✓ Directory ready for fresh configuration" +echo " DONE: Directory ready for fresh configuration" echo "" # Set ERF_DIR diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 4ee267d29f..4e36009f0a 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -98,7 +98,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") " CC --version\n" "") else() - message(STATUS " GCC version ${CMAKE_CXX_COMPILER_VERSION} >= 8.0 ✓") + message(STATUS " GCC version ${CMAKE_CXX_COMPILER_VERSION} >= 8.0") erf_cray_verbose("GCC version sufficient for C++17 ") endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") @@ -321,7 +321,7 @@ if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) erf_cray_verbose(" - Incorrect compiler wrapper handling") erf_cray_verbose(" - Missing Cray-specific find modules") else() - message(STATUS " CMake version ${CMAKE_VERSION} >= ${ERF_RECOMMENDED_CMAKE_VERSION} ✓") + message(STATUS " CMake version ${CMAKE_VERSION} >= ${ERF_RECOMMENDED_CMAKE_VERSION}") erf_cray_verbose("CMake version check passed") endif() @@ -338,14 +338,14 @@ if(ERF_ENABLE_CUDA) # Check for CUDA_HOME (set by cudatoolkit module) if(DEFINED ENV{CUDA_HOME}) - message(STATUS " CUDA_HOME = $ENV{CUDA_HOME} ✓") + message(STATUS " CUDA_HOME = $ENV{CUDA_HOME}") set(CUDA_TOOLKIT_LOADED TRUE) erf_cray_verbose("CUDA toolkit appears to be loaded (CUDA_HOME set)") endif() # Additional check for CUDATOOLKIT_HOME (alternative Cray variable) if(DEFINED ENV{CUDATOOLKIT_HOME}) - message(STATUS " CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME} ✓") + message(STATUS " CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") set(CUDA_TOOLKIT_LOADED TRUE) erf_cray_verbose("CUDA toolkit appears to be loaded (CUDATOOLKIT_HOME set)") endif() @@ -353,7 +353,7 @@ if(ERF_ENABLE_CUDA) # Check for nvcc in PATH find_program(NVCC_EXECUTABLE nvcc) if(NVCC_EXECUTABLE) - message(STATUS " Found nvcc: ${NVCC_EXECUTABLE} ✓") + message(STATUS " Found nvcc: ${NVCC_EXECUTABLE}") set(CUDA_TOOLKIT_LOADED TRUE) erf_cray_verbose("nvcc found in PATH") endif() @@ -395,9 +395,9 @@ if(ERF_ENABLE_CUDA) erf_cray_verbose("AMReX_CUDA_ARCH not set (will use CMake default)") else() if(AMReX_CUDA_ARCH) - message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH} ✓") + message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH}") else() - message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} ✓") + message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH}") endif() endif() else() @@ -414,7 +414,7 @@ if(ERF_ENABLE_NETCDF) set(NETCDF_LOADED FALSE) if(DEFINED ENV{NETCDF_DIR}) - message(STATUS " NETCDF_DIR = $ENV{NETCDF_DIR} ✓") + message(STATUS " NETCDF_DIR = $ENV{NETCDF_DIR}") set(NETCDF_LOADED TRUE) endif() diff --git a/CMake/UtilityTargets.cmake b/CMake/UtilityTargets.cmake index 1664a3a87e..9fc820d5f1 100644 --- a/CMake/UtilityTargets.cmake +++ b/CMake/UtilityTargets.cmake @@ -73,7 +73,7 @@ add_custom_target(distclean # Summary COMMAND ${CMAKE_COMMAND} -E echo "" - COMMAND ${CMAKE_COMMAND} -E echo "✓ Distclean complete" + COMMAND ${CMAKE_COMMAND} -E echo " DONE: Distclean complete" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Note: Install directories preserved" From 86c69fb8b8119daa9fd087587fb53a1b025e7f63 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 09:04:15 -0500 Subject: [PATCH 27/44] Add rocm detection --- CMake/CrayDetection.cmake | 373 ++++++++++++++++++++++++-------------- 1 file changed, 240 insertions(+), 133 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 4e36009f0a..50465b24fd 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -202,91 +202,183 @@ if(ERF_ENABLE_CUDA) message(WARNING "") endif() - # ------------------------------------------------------------------------- - # Detect Kokkos architecture (for EKAT builds) - # Priority: CMake var > KOKKOS_GPU_ARCH env > CRAY_ACCEL_TARGET - # ------------------------------------------------------------------------- +endif() + +# ----------------------------------------------------------------------------- +# Detect AMReX AMD architecture (for HIP builds) +# Priority: CMake var > AMREX_AMD_ARCH env > CMAKE_AMD_ARCH env > CRAY_ACCEL_TARGET +# ----------------------------------------------------------------------------- + +if(AMReX_GPU_BACKEND MATCHES "HIP" OR ERF_ENABLE_HIP) + message(STATUS "") + message(STATUS "ERF: Checking HIP/ROCm compiler configuration...") - if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) - message(STATUS "") - message(STATUS " EKAT-based physics enabled, checking Kokkos architecture...") + if(AMReX_AMD_ARCH) + message(STATUS " AMReX_AMD_ARCH = ${AMReX_AMD_ARCH} (user specified)") + erf_cray_verbose("AMReX AMD arch set via CMake variable") + + elseif(DEFINED ENV{AMREX_AMD_ARCH}) + set(AMReX_AMD_ARCH "$ENV{AMREX_AMD_ARCH}" CACHE STRING "AMD arch from AMREX_AMD_ARCH") + message(STATUS " AMReX_AMD_ARCH = $ENV{AMREX_AMD_ARCH} (from AMREX_AMD_ARCH)") + erf_cray_verbose("AMReX AMD arch from AMREX_AMD_ARCH environment variable") + + elseif(DEFINED ENV{CMAKE_AMD_ARCH}) + set(AMReX_AMD_ARCH "$ENV{CMAKE_AMD_ARCH}" CACHE STRING "AMD arch from CMAKE_AMD_ARCH") + message(STATUS " AMReX_AMD_ARCH = $ENV{CMAKE_AMD_ARCH} (from CMAKE_AMD_ARCH)") + erf_cray_verbose("AMReX AMD arch from CMAKE_AMD_ARCH environment variable") + + elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) + # Auto-detect from Cray accelerator module + set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + message(STATUS " Detected CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + + if(CRAY_ACCEL_TARGET STREQUAL "amd_gfx90a") + set(AMReX_AMD_ARCH "gfx90a" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_AMD_ARCH = gfx90a (MI200 from CRAY_ACCEL_TARGET)") + elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx908") + set(AMReX_AMD_ARCH "gfx908" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_AMD_ARCH = gfx908 (MI100 from CRAY_ACCEL_TARGET)") + elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx942") + set(AMReX_AMD_ARCH "gfx942" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") + message(STATUS " AMReX_AMD_ARCH = gfx942 (MI300 from CRAY_ACCEL_TARGET)") + else() + message(WARNING "ERF: Unknown CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + endif() + erf_cray_verbose("AMReX AMD arch from CRAY_ACCEL_TARGET module variable") + else() + message(WARNING "") + message(WARNING "ERF: AMReX_AMD_ARCH not detected") + message(WARNING " For Frontier: module load craype-accel-amd-gfx90a") + message(WARNING " Or set: export CMAKE_AMD_ARCH=gfx90a") + message(WARNING " Or set: -DAMReX_AMD_ARCH=gfx90a") + message(WARNING "") + endif() + +endif() +# ------------------------------------------------------------------------- +# Detect Kokkos architecture (for EKAT builds) +# Priority: CMake var > KOKKOS_GPU_ARCH env > CRAY_ACCEL_TARGET +# ------------------------------------------------------------------------- + +if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) + message(STATUS "") + message(STATUS " EKAT-based physics enabled, checking Kokkos architecture...") + + # Check if user already set Kokkos_ARCH_* via CMake + set(KOKKOS_ARCH_SET FALSE) + + # Check for CUDA architectures + if(Kokkos_ARCH_VOLTA70 OR Kokkos_ARCH_AMPERE80 OR Kokkos_ARCH_HOPPER90) + set(KOKKOS_ARCH_SET TRUE) + message(STATUS " Kokkos CUDA arch already set by user") + erf_cray_verbose("User specified Kokkos CUDA architecture via CMake variable") + + # Check for AMD architectures + elseif(Kokkos_ARCH_VEGA90A OR Kokkos_ARCH_VEGA908 OR Kokkos_ARCH_MI300A) + set(KOKKOS_ARCH_SET TRUE) + message(STATUS " Kokkos AMD arch already set by user") + erf_cray_verbose("User specified Kokkos AMD architecture via CMake variable") + + elseif(DEFINED ENV{KOKKOS_GPU_ARCH}) + # Detect from KOKKOS_GPU_ARCH environment variable (build scripts) + set(KOKKOS_GPU_ARCH_ENV "$ENV{KOKKOS_GPU_ARCH}") + message(STATUS " Detected KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") - # Check if user already set Kokkos_ARCH_* via CMake - set(KOKKOS_ARCH_SET FALSE) - if(Kokkos_ARCH_VOLTA70 OR Kokkos_ARCH_AMPERE80 OR Kokkos_ARCH_HOPPER90) + # Map NVIDIA architectures + if(KOKKOS_GPU_ARCH_ENV STREQUAL "VOLTA70") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON") set(KOKKOS_ARCH_SET TRUE) - message(STATUS " Kokkos_ARCH_* already set by user") - erf_cray_verbose("User specified Kokkos architecture via CMake variable") + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VOLTA70 -> Kokkos_ARCH_VOLTA70=ON") - elseif(DEFINED ENV{KOKKOS_GPU_ARCH}) - # Detect from KOKKOS_GPU_ARCH environment variable (build scripts) - set(KOKKOS_GPU_ARCH_ENV "$ENV{KOKKOS_GPU_ARCH}") - message(STATUS " Detected KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "AMPERE80") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=AMPERE80 -> Kokkos_ARCH_AMPERE80=ON") - # Map to Kokkos_ARCH_* CMake variable - if(KOKKOS_GPU_ARCH_ENV STREQUAL "VOLTA70") - set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VOLTA70 -> Kokkos_ARCH_VOLTA70=ON") - - elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "AMPERE80") - set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=AMPERE80 -> Kokkos_ARCH_AMPERE80=ON") - - elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "HOPPER90") - set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=HOPPER90 -> Kokkos_ARCH_HOPPER90=ON") - - else() - message(WARNING "ERF: Unknown KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") - message(WARNING " Expected: VOLTA70, AMPERE80, or HOPPER90") - endif() + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "HOPPER90") + set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=HOPPER90 -> Kokkos_ARCH_HOPPER90=ON") + + # Map AMD architectures + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "VEGA90A") + set(Kokkos_ARCH_VEGA90A ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_VEGA90A = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VEGA90A -> Kokkos_ARCH_VEGA90A=ON") - elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) - # Fall back to CRAY_ACCEL_TARGET (set by 'module load gpu') - set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "VEGA908") + set(Kokkos_ARCH_VEGA908 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") + message(STATUS " Set Kokkos_ARCH_VEGA908 = ON") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VEGA908 -> Kokkos_ARCH_VEGA908=ON") - if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") - set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON (from CRAY_ACCEL_TARGET)") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia70 -> Kokkos_ARCH_VOLTA70=ON") - - elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") - set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON (from CRAY_ACCEL_TARGET)") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia80 -> Kokkos_ARCH_AMPERE80=ON") - - elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") - set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON (from CRAY_ACCEL_TARGET)") - set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia90 -> Kokkos_ARCH_HOPPER90=ON") - endif() + else() + message(WARNING "ERF: Unknown KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") + message(WARNING " Expected: VOLTA70, AMPERE80, HOPPER90, VEGA90A, or VEGA908") endif() - if(NOT KOKKOS_ARCH_SET) - message(WARNING "") - message(WARNING "ERF: Kokkos architecture not detected") - message(WARNING " For Perlmutter: module load gpu") - message(WARNING " Or set: export KOKKOS_GPU_ARCH=AMPERE80") - message(WARNING " Or set: -DKokkos_ARCH_AMPERE80=ON") - message(WARNING "") - else() - message(STATUS "") - message(STATUS " Note: After Kokkos configures, CMAKE_CUDA_ARCHITECTURES") - message(STATUS " will be set from Kokkos_CUDA_ARCHITECTURES") - erf_cray_verbose("Kokkos will set CMAKE_CUDA_ARCHITECTURES when CUDA language is enabled") + elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) + # Fall back to CRAY_ACCEL_TARGET (set by 'module load gpu' or 'module load craype-accel-*') + set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + + # NVIDIA targets + if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia70 -> Kokkos_ARCH_VOLTA70=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia80 -> Kokkos_ARCH_AMPERE80=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") + set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia90 -> Kokkos_ARCH_HOPPER90=ON") + + # AMD targets + elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx90a") + set(Kokkos_ARCH_VEGA90A ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_VEGA90A = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx90a -> Kokkos_ARCH_VEGA90A=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx908") + set(Kokkos_ARCH_VEGA908 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_VEGA908 = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx908 -> Kokkos_ARCH_VEGA908=ON") + + elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx942") + set(Kokkos_ARCH_MI300A ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") + message(STATUS " Set Kokkos_ARCH_MI300A = ON (from CRAY_ACCEL_TARGET)") + set(KOKKOS_ARCH_SET TRUE) + erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx942 -> Kokkos_ARCH_MI300A=ON") endif() endif() - message(STATUS "") + if(NOT KOKKOS_ARCH_SET) + message(WARNING "") + message(WARNING "ERF: Kokkos architecture not detected") + message(WARNING " For Perlmutter: module load gpu") + message(WARNING " For Frontier: module load craype-accel-amd-gfx90a") + message(WARNING " Or set: export KOKKOS_GPU_ARCH=AMPERE80 (or VEGA90A)") + message(WARNING " Or set: -DKokkos_ARCH_AMPERE80=ON (or -DKokkos_ARCH_VEGA90A=ON)") + message(WARNING "") + else() + message(STATUS "") + message(STATUS " Note: After Kokkos configures, CMAKE_CUDA_ARCHITECTURES") + message(STATUS " will be set from Kokkos_CUDA_ARCHITECTURES") + erf_cray_verbose("Kokkos will set CMAKE_CUDA_ARCHITECTURES when CUDA language is enabled") + endif() endif() # ============================================================================== @@ -624,70 +716,85 @@ endif() # which enables GPU Transfer Library for direct GPU-GPU communication # SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags -# ============================================================================== -# Fix 4: GPU-aware MPI with Cray GTL (Checklist Item 4) -# ============================================================================== -# PROBLEM: GPU-aware MPI on Cray requires linking against mpi_gtl_cuda library -# which enables GPU Transfer Library for direct GPU-GPU communication -# SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags - -if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") - message(STATUS "ERF: [Fix 4] Applying GPU-aware MPI fix (Cray GTL)") - - erf_cray_verbose("Problem: GPU-aware MPI needs Cray GTL libraries") - erf_cray_verbose("Condition: CUDA + MPI + MPICH_GPU_SUPPORT_ENABLED=1") - erf_cray_verbose("Solution: Add -lmpi_gnu_123 -lmpi_gtl_cuda to link flags") - erf_cray_verbose("MPICH_GPU_SUPPORT_ENABLED = $ENV{MPICH_GPU_SUPPORT_ENABLED}") - - # Set the MPI+GTL libraries - # Note: We use -lmpi_gnu_123 explicitly because Cray's --as-needed can drop it - set(CRAY_MPI_LIBS "-lmpi_gnu_123 -lmpi_gtl_cuda") - - # Try to verify the library exists (for diagnostics) - set(MPI_LIB_SEARCH_PATHS "") - if(DEFINED ENV{MPICH_DIR}) - list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") +if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") + set(APPLY_FIX4 FALSE) + set(GPU_TYPE "") + set(GTL_LIB "") + set(MPI_BASE_LIB "mpi_cray") + + # Determine GPU type and GTL library + if(ERF_ENABLE_CUDA) + set(APPLY_FIX4 TRUE) + set(GPU_TYPE "CUDA") + set(GTL_LIB "mpi_gtl_cuda") + elseif(AMReX_GPU_BACKEND MATCHES "HIP") + set(APPLY_FIX4 TRUE) + set(GPU_TYPE "HIP/ROCm") + set(GTL_LIB "mpi_gtl_hsa") endif() - if(DEFINED ENV{CRAY_MPICH_DIR}) - list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") - endif() - - erf_cray_verbose("Searching for mpi_gtl_cuda library in:") - foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) - erf_cray_verbose(" ${path}") - endforeach() - find_library(CRAY_MPI_GTL_CUDA - NAMES mpi_gtl_cuda - HINTS ${MPI_LIB_SEARCH_PATHS} - NO_DEFAULT_PATH - ) - - if(CRAY_MPI_GTL_CUDA) - message(STATUS " Found GTL library: ${CRAY_MPI_GTL_CUDA}") - erf_cray_verbose("Library verification successful") - else() - message(STATUS " GTL library not found via find_library (will rely on linker search)") - erf_cray_verbose("Library not found in search paths, but linker may still find it") - erf_cray_verbose("This is normal if libraries are in non-standard Cray locations") - endif() - - # Apply the fix regardless of whether find_library succeeded - # The Cray linker knows where to find these libraries - message(STATUS " Adding MPI+GTL libraries: ${CRAY_MPI_LIBS}") - erf_cray_verbose("Adding to CMAKE_CUDA_STANDARD_LIBRARIES") - erf_cray_verbose("Adding to CMAKE_CXX_STANDARD_LIBRARIES") - - set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) - set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) + if(APPLY_FIX4) + message(STATUS "ERF: [Fix 4] Applying GPU-aware MPI fix (Cray GTL for ${GPU_TYPE})") + + erf_cray_verbose("Problem: GPU-aware MPI needs Cray GTL libraries") + erf_cray_verbose("Condition: ${GPU_TYPE} + MPI + MPICH_GPU_SUPPORT_ENABLED=1") + erf_cray_verbose("Solution: Add -l${MPI_BASE_LIB} -l${GTL_LIB} to link flags") + erf_cray_verbose("MPICH_GPU_SUPPORT_ENABLED = $ENV{MPICH_GPU_SUPPORT_ENABLED}") + + # Set the MPI+GTL libraries + set(CRAY_MPI_LIBS "-l${MPI_BASE_LIB} -l${GTL_LIB}") + + # Try to verify the library exists (for diagnostics) + set(MPI_LIB_SEARCH_PATHS "") + if(DEFINED ENV{MPICH_DIR}) + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") + endif() + if(DEFINED ENV{CRAY_MPICH_DIR}) + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") + endif() + + erf_cray_verbose("Searching for ${GTL_LIB} library in:") + foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) + erf_cray_verbose(" ${path}") + endforeach() + + find_library(CRAY_MPI_GTL_LIB + NAMES ${GTL_LIB} + HINTS ${MPI_LIB_SEARCH_PATHS} + NO_DEFAULT_PATH + ) + + if(CRAY_MPI_GTL_LIB) + message(STATUS " Found GTL library: ${CRAY_MPI_GTL_LIB}") + erf_cray_verbose("Library verification successful") + else() + message(STATUS " GTL library not found via find_library (will rely on linker search)") + erf_cray_verbose("Library not found in search paths, but linker may still find it") + erf_cray_verbose("This is normal if libraries are in non-standard Cray locations") + endif() - erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") - erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") + # Apply the fix regardless of whether find_library succeeded + # The Cray linker knows where to find these libraries + message(STATUS " Adding MPI+GTL libraries: ${CRAY_MPI_LIBS}") + erf_cray_verbose("Adding to CMAKE_*_STANDARD_LIBRARIES") + + if(ERF_ENABLE_CUDA) + set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + else() + set(CMAKE_HIP_STANDARD_LIBRARIES "${CMAKE_HIP_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + erf_cray_verbose("CMAKE_HIP_STANDARD_LIBRARIES: ${CMAKE_HIP_STANDARD_LIBRARIES}") + endif() + + set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") + endif() else() - if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI) + if(ERF_ENABLE_MPI AND (ERF_ENABLE_CUDA OR AMReX_GPU_BACKEND MATCHES "HIP")) if(NOT DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) message(STATUS "") message(STATUS " Note: MPICH_GPU_SUPPORT_ENABLED not set") @@ -699,7 +806,7 @@ else() erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED=$ENV{MPICH_GPU_SUPPORT_ENABLED} (not '1')") endif() else() - erf_cray_verbose("Fix 4 not needed (CUDA+MPI not both enabled)") + erf_cray_verbose("Fix 4 not needed (GPU+MPI not both enabled)") endif() endif() From a84a34dbaf81394793ab64eeab5a222c56ecd25a Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 09:23:00 -0500 Subject: [PATCH 28/44] More general mpi library name detection --- CMake/CrayDetection.cmake | 99 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 50465b24fd..0328079110 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -720,7 +720,104 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") set(APPLY_FIX4 FALSE) set(GPU_TYPE "") set(GTL_LIB "") - set(MPI_BASE_LIB "mpi_cray") + + # Detect which MPI library variant to use + set(MPI_BASE_LIB "") # Will be determined + + # Try 1: Use pkg-config with Cray compiler wrapper path (for Cray systems) + find_package(PkgConfig QUIET) + if(PkgConfig_FOUND) + # On Cray systems, get pkg-config path from compiler wrapper + execute_process( + COMMAND CC --cray-print-opts=pkg_config_path + OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE CC_RESULT + ) + + if(CC_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) + erf_cray_verbose("Found PKG_CONFIG_PATH from CC wrapper: ${CRAY_PKG_CONFIG_PATH}") + # Temporarily prepend to PKG_CONFIG_PATH for pkg-config search + set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") + else() + erf_cray_verbose("CC wrapper not available or doesn't support --cray-print-opts") + endif() + + pkg_check_modules(CRAY_MPI QUIET mpich) + if(CRAY_MPI_FOUND) + erf_cray_verbose("pkg-config found mpich") + erf_cray_verbose(" CRAY_MPI_LIBRARIES: ${CRAY_MPI_LIBRARIES}") + erf_cray_verbose(" CRAY_MPI_LINK_LIBRARIES: ${CRAY_MPI_LINK_LIBRARIES}") + + # Extract the base MPI library name from link flags + # Try both LIBRARIES and LINK_LIBRARIES + foreach(lib IN LISTS CRAY_MPI_LIBRARIES CRAY_MPI_LINK_LIBRARIES) + if(lib MATCHES "^mpi_" AND NOT lib MATCHES "mpi_gtl") + set(MPI_BASE_LIB "${lib}") + erf_cray_verbose("Detected MPI base lib from pkg-config: ${MPI_BASE_LIB}") + break() + endif() + endforeach() + else() + erf_cray_verbose("pkg-config did not find mpich") + endif() + endif() + + # Try 2: Search for library files (fallback) + if(NOT MPI_BASE_LIB) + erf_cray_verbose("Falling back to filesystem search for MPI library") + set(MPI_LIB_SEARCH_PATHS "") + if(DEFINED ENV{MPICH_DIR}) + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") + endif() + if(DEFINED ENV{CRAY_MPICH_DIR}) + list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") + endif() + + erf_cray_verbose("Searching for MPI libraries in: ${MPI_LIB_SEARCH_PATHS}") + + # Look for versioned libraries first (more specific) + foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) + file(GLOB mpi_libs "${path}/libmpi_*.so" "${path}/libmpi_*.a") + foreach(lib IN LISTS mpi_libs) + get_filename_component(libname "${lib}" NAME_WE) + string(REGEX REPLACE "^lib" "" libname "${libname}") + # Prefer mpi_gnu_*, mpi_cray over mpi_gtl_* + if(libname MATCHES "^mpi_(gnu|cray|intel)" AND NOT MPI_BASE_LIB) + set(MPI_BASE_LIB "${libname}") + erf_cray_verbose("Detected MPI base lib from filesystem: ${MPI_BASE_LIB} at ${lib}") + break() + endif() + endforeach() + if(MPI_BASE_LIB) + break() + endif() + endforeach() + endif() + + # Try 3: Compiler-based heuristic (last resort) + if(NOT MPI_BASE_LIB) + erf_cray_verbose("Falling back to compiler-based heuristic for MPI library") + if(DEFINED ENV{CRAY_MPICH_DIR}) + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(MPI_BASE_LIB "mpi_gnu_123") + message(WARNING "ERF: Could not auto-detect MPI library, using heuristic: ${MPI_BASE_LIB}") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + set(MPI_BASE_LIB "mpi_cray") + message(WARNING "ERF: Could not auto-detect MPI library, using heuristic: ${MPI_BASE_LIB}") + endif() + else() + set(MPI_BASE_LIB "mpi") + erf_cray_verbose("Non-Cray system, using default: ${MPI_BASE_LIB}") + endif() + endif() + + if(MPI_BASE_LIB) + message(STATUS " Using MPI base library: ${MPI_BASE_LIB}") + else() + message(WARNING "ERF: Could not determine MPI base library name!") + endif() # Determine GPU type and GTL library if(ERF_ENABLE_CUDA) From 8cabde04a4cd20ae3c3f474424c05356442502eb Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 10:28:10 -0500 Subject: [PATCH 29/44] HDF5 detection improvements with hip --- Build/cmake_with_kokkos_many.sh | 4 +- Build/cmake_with_kokkos_many_cuda.sh | 25 ++++ .../cmake_with_kokkos_many_noradiation_hip.sh | 25 ++++ Build/cmake_with_kokkos_many_sycl.sh | 25 ++++ CMake/CrayDetection.cmake | 110 ++++++++++++++++++ 5 files changed, 188 insertions(+), 1 deletion(-) create mode 100755 Build/cmake_with_kokkos_many_cuda.sh create mode 100755 Build/cmake_with_kokkos_many_noradiation_hip.sh create mode 100755 Build/cmake_with_kokkos_many_sycl.sh diff --git a/Build/cmake_with_kokkos_many.sh b/Build/cmake_with_kokkos_many.sh index 36c3d04704..43218b0dbe 100755 --- a/Build/cmake_with_kokkos_many.sh +++ b/Build/cmake_with_kokkos_many.sh @@ -12,7 +12,9 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ -DERF_ENABLE_RRTMGP:BOOL=ON \ -DERF_ENABLE_SHOC:BOOL=OFF \ -DERF_ENABLE_MPI:BOOL=ON \ - -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=OFF \ + -DERF_ENABLE_HIP:BOOL=OFF \ + -DERF_ENABLE_SYCL:BOOL=OFF \ -DERF_ENABLE_TESTS:BOOL=ON \ -DERF_ENABLE_FCOMPARE:BOOL=ON \ -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ diff --git a/Build/cmake_with_kokkos_many_cuda.sh b/Build/cmake_with_kokkos_many_cuda.sh new file mode 100755 index 0000000000..bc7cf23345 --- /dev/null +++ b/Build/cmake_with_kokkos_many_cuda.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +#Example cmake configuration script that assumes cray detection + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_FFT:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_RRTMGP:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=OFF \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=ON \ + -DERF_ENABLE_HIP:BOOL=OFF \ + -DERF_ENABLE_SYCL:BOOL=OFF \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -B build_erf .. + +cmake --build build_erf -j10 -v +cmake --install build_erf --prefix=install_erf diff --git a/Build/cmake_with_kokkos_many_noradiation_hip.sh b/Build/cmake_with_kokkos_many_noradiation_hip.sh new file mode 100755 index 0000000000..a2fd160a93 --- /dev/null +++ b/Build/cmake_with_kokkos_many_noradiation_hip.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +#Example cmake configuration script that assumes cray detection + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_FFT:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_RRTMGP:BOOL=OFF \ + -DERF_ENABLE_SHOC:BOOL=OFF \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=OFF \ + -DERF_ENABLE_HIP:BOOL=ON \ + -DERF_ENABLE_SYCL:BOOL=OFF \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -B build_erf .. + +cmake --build build_erf -j10 -v +cmake --install build_erf --prefix=install_erf diff --git a/Build/cmake_with_kokkos_many_sycl.sh b/Build/cmake_with_kokkos_many_sycl.sh new file mode 100755 index 0000000000..b01c6dad09 --- /dev/null +++ b/Build/cmake_with_kokkos_many_sycl.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +#Example cmake configuration script that assumes cray detection + +cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ + -DMPIEXEC_PREFLAGS:STRING=--oversubscribe \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DERF_DIM:STRING=3 \ + -DERF_ENABLE_FFT:BOOL=ON \ + -DERF_ENABLE_NETCDF:BOOL=ON \ + -DERF_ENABLE_HDF5:BOOL=ON \ + -DERF_ENABLE_RRTMGP:BOOL=ON \ + -DERF_ENABLE_SHOC:BOOL=OFF \ + -DERF_ENABLE_MPI:BOOL=ON \ + -DERF_ENABLE_CUDA:BOOL=OFF \ + -DERF_ENABLE_HIP:BOOL=OFF \ + -DERF_ENABLE_SYCL:BOOL=ON \ + -DERF_ENABLE_TESTS:BOOL=ON \ + -DERF_ENABLE_FCOMPARE:BOOL=ON \ + -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -B build_erf .. + +cmake --build build_erf -j10 -v +cmake --install build_erf --prefix=install_erf diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 0328079110..94424405b4 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -525,6 +525,36 @@ else() erf_cray_verbose("NetCDF not enabled, skipping NetCDF checks") endif() +# ----------------------------------------------------------------------------- +# HDF5 Module Check (for AMReX HDF5 support) +# ----------------------------------------------------------------------------- + +if(AMReX_HDF5) + message(STATUS " Checking for HDF5...") + + set(HDF5_LOADED FALSE) + + if(DEFINED ENV{HDF5_DIR}) + message(STATUS " HDF5_DIR = $ENV{HDF5_DIR}") + set(HDF5_LOADED TRUE) + elseif(DEFINED ENV{HDF5_ROOT}) + message(STATUS " HDF5_ROOT = $ENV{HDF5_ROOT}") + set(HDF5_LOADED TRUE) + endif() + + if(NOT HDF5_LOADED) + message(WARNING "") + message(WARNING "ERF: HDF5 enabled but HDF5_DIR/HDF5_ROOT not set") + message(WARNING " To fix:") + message(WARNING " module load cray-hdf5-parallel") + message(WARNING "") + + erf_cray_verbose("HDF5_DIR/HDF5_ROOT not found in environment") + endif() +else() + erf_cray_verbose("HDF5 not enabled, skipping HDF5 checks") +endif() + # ----------------------------------------------------------------------------- # Module Environment Summary # ----------------------------------------------------------------------------- @@ -907,6 +937,72 @@ else() endif() endif() +# ============================================================================== +# Fix 7: HDF5 parallel detection for HIP builds (AMD GPUs) +# ============================================================================== +# PROBLEM: When building with HIP, FindHDF5 may find non-parallel HDF5 or +# detect different HDF5 versions for different languages (C vs HIP) +# SOLUTION: Use pkg-config to get HDF5 info and pre-configure HDF5 hints + +if(AMReX_GPU_BACKEND MATCHES "HIP" AND AMReX_HDF5) + message(STATUS "ERF: [Fix 7] Configuring HDF5 for HIP build") + + erf_cray_verbose("Problem: HIP compiler may find different HDF5 than C compiler") + erf_cray_verbose("Condition: AMReX_GPU_BACKEND=HIP and AMReX_HDF5=ON") + erf_cray_verbose("Solution: Use pkg-config to set HDF5 hints before AMReX configures") + + find_package(PkgConfig QUIET) + if(PkgConfig_FOUND) + # Get pkg-config path from Cray compiler wrapper + execute_process( + COMMAND CC --cray-print-opts=pkg_config_path + OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE CC_RESULT + ) + + if(CC_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) + set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") + erf_cray_verbose("Added Cray pkg-config path for HDF5 detection") + erf_cray_verbose(" PKG_CONFIG_PATH: ${CRAY_PKG_CONFIG_PATH}") + endif() + + # Query pkg-config for HDF5 + pkg_check_modules(PC_HDF5 QUIET hdf5) + if(PC_HDF5_FOUND) + message(STATUS " Found HDF5 via pkg-config") + erf_cray_verbose(" HDF5 prefix: ${PC_HDF5_PREFIX}") + erf_cray_verbose(" HDF5 include dirs: ${PC_HDF5_INCLUDE_DIRS}") + erf_cray_verbose(" HDF5 library dirs: ${PC_HDF5_LIBRARY_DIRS}") + + # Set hints for CMake's FindHDF5 (used by AMReX) + set(HDF5_ROOT "${PC_HDF5_PREFIX}" CACHE PATH "HDF5 root from pkg-config") + set(HDF5_PREFER_PARALLEL ON CACHE BOOL "Prefer parallel HDF5") + set(HDF5_IS_PARALLEL TRUE CACHE BOOL "HDF5 is parallel") + + # Help FindHDF5 find the right paths + list(APPEND CMAKE_PREFIX_PATH "${PC_HDF5_PREFIX}") + + message(STATUS " Set HDF5_ROOT = ${PC_HDF5_PREFIX}") + message(STATUS " Set HDF5_PREFER_PARALLEL = ON") + message(STATUS " Set HDF5_IS_PARALLEL = TRUE") + else() + message(WARNING "ERF: pkg-config could not find HDF5") + erf_cray_verbose("pkg-config search for hdf5 failed") + endif() + else() + message(WARNING "ERF: PkgConfig not found, cannot auto-configure HDF5") + endif() + +else() + if(AMReX_HDF5 AND NOT AMReX_GPU_BACKEND MATCHES "HIP") + erf_cray_verbose("Fix 7 not needed (HDF5 enabled but not using HIP backend)") + else() + erf_cray_verbose("Fix 7 not needed (AMReX_HDF5 not enabled)") + endif() +endif() + # ============================================================================== # Fix 5-6: NetCDF with cray-netcdf-hdf5parallel (Checklist Items 5-6) # ============================================================================== @@ -1059,6 +1155,20 @@ if(FIX56_ACTIVE) endif() endif() +# Fix 7: HDF5 for HIP +set(FIX7_ACTIVE OFF) +if(AMReX_GPU_BACKEND MATCHES "HIP" AND AMReX_HDF5) + set(FIX7_ACTIVE ON) +endif() +message(STATUS "") +message(STATUS " Fix 7 (HDF5+HIP): ${FIX7_ACTIVE}") +if(FIX7_ACTIVE) + message(STATUS " Command line equivalent:") + message(STATUS " -DHDF5_ROOT=\$(pkg-config --variable=prefix hdf5)") + message(STATUS " -DHDF5_PREFER_PARALLEL=ON") + message(STATUS " -DHDF5_IS_PARALLEL=TRUE") +endif() + message(STATUS "") message(STATUS "══════════════════════════════════════════════════════════════") message(STATUS " To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") From 2a191eb675611aa6b74aa3e71cdd612ee9f208c6 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 10:34:01 -0500 Subject: [PATCH 30/44] Add modules --- Build/machines/frontier_erf.profile | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 Build/machines/frontier_erf.profile diff --git a/Build/machines/frontier_erf.profile b/Build/machines/frontier_erf.profile new file mode 100644 index 0000000000..01645623b8 --- /dev/null +++ b/Build/machines/frontier_erf.profile @@ -0,0 +1,10 @@ +module load cmake/3.30.5 +module load craype-accel-amd-gfx90a +module load rocm/6.2.4 +module load cray-mpich/8.1.31 +module load cce/18.0.1 + +module load cray-hdf5-parallel +module load cray-netcdf-hdf5parallel + +# export MPICH_GPU_SUPPORT_ENABLED=1 From 5651ae9a1f31ec4651a085260ce513655804a9f2 Mon Sep 17 00:00:00 2001 From: "Aaron M. Lattanzi" <103702284+AMLattanzi@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:54:20 -0800 Subject: [PATCH 31/44] Landmaks fix for metgrid. (#2706) --- Source/IO/ERF_ReadFromMetgrid.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/IO/ERF_ReadFromMetgrid.cpp b/Source/IO/ERF_ReadFromMetgrid.cpp index d5e1658a11..a78a5f1297 100644 --- a/Source/IO/ERF_ReadFromMetgrid.cpp +++ b/Source/IO/ERF_ReadFromMetgrid.cpp @@ -117,7 +117,7 @@ read_from_metgrid (int lev, const Box& domain, const std::string& fname, Vector success_i; success_i.resize(NC_iabs.size()); BuildFABsFromNetCDFFile(domain, fname, NC_inames, NC_idim_types, NC_iabs, success_i); for (int i = 0; i < success_i.size(); i++) { - if (NC_inames[i] == "LANDMASK" && success[i] == 1) {flag_lmask = 1;} + if (NC_inames[i] == "LANDMASK" && success_i[i] == 1) {flag_lmask = 1;} } // TODO: FIND OUT IF WE NEED TO DIVIDE VELS BY MAPFAC From 22ee634559a1a3a761b2bd58bd70161173d5b8ee Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 16:27:26 -0500 Subject: [PATCH 32/44] Style --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b41f01293f..fb3cffa882 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,7 +210,7 @@ if(ERF_ENABLE_MPI) if(DEFINED ENV{CRAYPE_VERSION} OR DEFINED ENV{CRAY_MPICH_DIR}) # On Cray system - check if using problematic bare MPI wrappers - if(CMAKE_CXX_COMPILER MATCHES "mpicxx" OR + if(CMAKE_CXX_COMPILER MATCHES "mpicxx" OR CMAKE_C_COMPILER MATCHES "mpicc" OR CMAKE_Fortran_COMPILER MATCHES "mpifort") message(STATUS "Detected bare MPI wrappers on Cray - skipping MPI detection (would hang)") From 86e3bceb02cae396f167e41f0d1210fa57af5a80 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 14:00:58 -0800 Subject: [PATCH 33/44] Add more logging --- CMake/FindNetCDF.cmake | 152 ++++++++++++++++++++++++++++++----------- 1 file changed, 112 insertions(+), 40 deletions(-) diff --git a/CMake/FindNetCDF.cmake b/CMake/FindNetCDF.cmake index 89faabdcaa..4168a5a956 100644 --- a/CMake/FindNetCDF.cmake +++ b/CMake/FindNetCDF.cmake @@ -15,20 +15,47 @@ # find_package (NetCDF REQUIRED) # target_link_libraries (target_name PUBLIC ${NETCDF_LINK_LIBRARIES}) -if (NETCDF_INCLUDES AND NETCDF_LIBRARIES) - # Already in cache, be silent - set (NETCDF_FIND_QUIETLY TRUE) -endif (NETCDF_INCLUDES AND NETCDF_LIBRARIES) +# Set FindNetCDF context +list(APPEND CMAKE_MESSAGE_CONTEXT "FindNetCDF") -# Build hints from user variables first +message(DEBUG "Starting NetCDF detection") + +# Detection log for failures +set(NETCDF_DETECTION_LOG "") + +# Check cache +if(NETCDF_INCLUDES AND NETCDF_LIBRARIES) + set(NETCDF_FIND_QUIETLY TRUE) + message(VERBOSE "NetCDF already in cache") + message(DEBUG " NETCDF_INCLUDES: ${NETCDF_INCLUDES}") + message(DEBUG " NETCDF_LIBRARIES: ${NETCDF_LIBRARIES}") +endif() + +# Build hints set(NETCDF_INCLUDE_HINTS) set(NETCDF_LIBRARY_HINTS) +message(DEBUG "Building search hints") + if(NETCDF_DIR) list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_DIR}/include) list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_DIR}/lib) + message(VERBOSE "Using NETCDF_DIR: ${NETCDF_DIR}") + list(APPEND NETCDF_DETECTION_LOG "NETCDF_DIR=${NETCDF_DIR}") +else() + message(DEBUG "NETCDF_DIR not set") + list(APPEND NETCDF_DETECTION_LOG "NETCDF_DIR not set") endif() +#if(DEFINED ENV{NETCDF_DIR}) +# list(APPEND NETCDF_INCLUDE_HINTS $ENV{NETCDF_DIR}/include) +# list(APPEND NETCDF_LIBRARY_HINTS $ENV{NETCDF_DIR}/lib) +# message(VERBOSE "Using ENV NETCDF_DIR: $ENV{NETCDF_DIR}") +# list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR=$ENV{NETCDF_DIR}") +#else() +# list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR not set") +#endif() + if(NETCDF_INCLUDE_DIR) list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIR}) endif() @@ -37,43 +64,60 @@ if(NETCDF_LIBRARY_DIR) list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIR}) endif() -# Use pkg-config to get hints +# Pkg-config +message(VERBOSE "Attempting pkg-config detection") set(ENV{PKG_CONFIG_PATH} "$ENV{MPICH_DIR}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") -message(STATUS "PKG_CONFIG_PATH = $ENV{PKG_CONFIG_PATH}") +message(DEBUG "PKG_CONFIG_PATH: $ENV{PKG_CONFIG_PATH}") find_package(PkgConfig QUIET) if(PKG_CONFIG_FOUND) - # Try multiple NetCDF variants in order of preference - pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf) - if(NOT NETCDF_FOUND) - pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf-mpi) - endif() - if(NOT NETCDF_FOUND) - pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf_parallel) - endif() - if(NOT NETCDF_FOUND) - pkg_check_modules(NETCDF QUIET IMPORTED_TARGET netcdf-cxx4_parallel) - endif() - - if(NETCDF_FOUND) - message(STATUS "Found NetCDF via pkg-config: ${NETCDF_MODULE_NAME}") - # Add pkg-config results to hints - list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIRS}) - list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIRS}) - endif() + message(DEBUG "pkg-config available") + + set(PKG_VARIANTS netcdf netcdf-mpi netcdf_parallel netcdf-cxx4_parallel) + foreach(variant ${PKG_VARIANTS}) + if(NOT NETCDF_FOUND) + message(DEBUG " Trying: ${variant}") + pkg_check_modules(NETCDF QUIET IMPORTED_TARGET ${variant}) + + if(NETCDF_FOUND) + message(VERBOSE "Found via pkg-config: ${variant}") + message(DEBUG " Version: ${NETCDF_VERSION}") + list(APPEND NETCDF_DETECTION_LOG "pkg-config ${variant}: found") + list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIRS}) + list(APPEND NETCDF_LIBRARY_HINTS ${NETCDF_LIBRARY_DIRS}) + break() + else() + list(APPEND NETCDF_DETECTION_LOG "pkg-config ${variant}: not found") + endif() + endif() + endforeach() +else() + message(DEBUG "pkg-config not available") + list(APPEND NETCDF_DETECTION_LOG "pkg-config: not available") endif() -# Try CMake's find_library using hints +# Manual search +message(VERBOSE "Searching for netcdf.h and libnetcdf") +message(DEBUG " Include hints: ${NETCDF_INCLUDE_HINTS}") +message(DEBUG " Library hints: ${NETCDF_LIBRARY_HINTS}") + find_path(NETCDF_INCLUDES netcdf.h HINTS ${NETCDF_INCLUDE_HINTS} $ENV{NETCDF_DIR}/include) +if(NETCDF_INCLUDES) + message(VERBOSE "Found netcdf.h: ${NETCDF_INCLUDES}") + list(APPEND NETCDF_DETECTION_LOG "find_path: ${NETCDF_INCLUDES}") +else() + message(DEBUG "netcdf.h not found") + list(APPEND NETCDF_DETECTION_LOG "find_path: failed") +endif() + find_library(NETCDF_LIBRARIES_C NAMES netcdf HINTS ${NETCDF_LIBRARY_HINTS} $ENV{NETCDF_DIR}/lib) mark_as_advanced(NETCDF_LIBRARIES_C) -# If find_library succeeded, check if we need HDF5 if(NETCDF_LIBRARIES_C) # Only add HDF5 if pkg-config told us NetCDF needs it if(NETCDF_LINK_LIBRARIES) @@ -92,18 +136,36 @@ if(NETCDF_LIBRARIES_C) message(STATUS " NETCDF_LIBRARIES_C = ${NETCDF_LINK_LIBRARIES}") endif() else() - message(STATUS "NetCDF was built without HDF5 support") + message(STATUS "NetCDF has no link libraries (potentially was built without HDF5 support)") endif() - else() - message(STATUS "No pkg-config information available; assuming NetCDF doesn't need HDF5") - endif() + message(VERBOSE "Found libnetcdf: ${NETCDF_LIBRARIES_C}") + list(APPEND NETCDF_DETECTION_LOG "find_library: ${NETCDF_LIBRARIES_C}") # FALLBACK: If find_library failed but pkg-config succeeded, use pkg-config's library list elseif(NETCDF_FOUND AND NETCDF_LINK_LIBRARIES) set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) message(STATUS "Using NetCDF libraries from pkg-config: ${NETCDF_LINK_LIBRARIES}") +else() + message(DEBUG "libnetcdf not found") + list(APPEND NETCDF_DETECTION_LOG "find_library: failed") +endif() + +# HDF5 dependency +message(DEBUG "Checking HDF5 dependency") +if(NETCDF_LIBRARIES_C AND NETCDF_LINK_LIBRARIES) + string(FIND "${NETCDF_LINK_LIBRARIES}" "hdf5" HDF5_IN_NETCDF) + if(HDF5_IN_NETCDF GREATER -1) + message(VERBOSE "NetCDF requires HDF5") + if(TARGET hdf5::hdf5 OR HDF5_FOUND) + list(APPEND NETCDF_LIBRARIES_C ${HDF5_LIBRARIES}) + message(DEBUG "Using HDF5: ${HDF5_LIBRARIES}") + else() + set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) + message(DEBUG "Using pkg-config libraries with HDF5") + endif() + endif() endif() -set(NetCDF_has_interfaces "YES") # will be set to NO if we're missing any interfaces +set(NetCDF_has_interfaces "YES") set(NetCDF_libs "${NETCDF_LIBRARIES_C}") get_filename_component(NetCDF_lib_dirs "${NETCDF_LIBRARIES_C}" PATH) @@ -135,14 +197,24 @@ set(NETCDF_LIBRARIES "${NetCDF_libs}" CACHE STRING "All NetCDF libraries require set(NETCDF_LINK_LIBRARIES ${NetCDF_libs}) set(NETCDF_INCLUDE_DIRS ${NETCDF_INCLUDES}) -# handle the QUIETLY and REQUIRED arguments and set NETCDF_FOUND to TRUE if -# all listed variables are TRUE -include (FindPackageHandleStandardArgs) +# Standard find package handling +include(FindPackageHandleStandardArgs) find_package_handle_standard_args (NetCDF DEFAULT_MSG NETCDF_LIBRARIES NETCDF_LINK_LIBRARIES NETCDF_INCLUDE_DIRS NETCDF_INCLUDES NetCDF_has_interfaces) -message(STATUS " NETCDF_LIBRARIES = ${NETCDF_LIBRARIES}") -#message(STATUS " NETCDF_LINK_LIBRARIES = ${NETCDF_LINK_LIBRARIES}") -#message(STATUS " NETCDF_INCLUDE_DIRS = ${NETCDF_INCLUDE_DIRS}") -message(STATUS " NETCDF_INCLUDES = ${NETCDF_INCLUDES}") -mark_as_advanced (NETCDF_LIBRARIES NETCDF_INCLUDES) +# Show diagnostics on failure +if(NOT NETCDF_FOUND) + message(STATUS "Detection attempts:") + foreach(attempt ${NETCDF_DETECTION_LOG}) + message(STATUS " ${attempt}") + endforeach() + message(STATUS "") + message(STATUS "To resolve:") + message(STATUS " Cray: module load cray-netcdf-hdf5parallel") + message(STATUS " Manual: -DNETCDF_DIR=/path/to/netcdf") + message(STATUS " Env var: export NETCDF_DIR=/path/to/netcdf") +endif() + +mark_as_advanced(NETCDF_LIBRARIES NETCDF_INCLUDES) +# Pop FindNetCDF context +list(POP_BACK CMAKE_MESSAGE_CONTEXT) From 0c2144d163204cf6f5cb75cb6ad1d9cea9c385a3 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 14:08:52 -0800 Subject: [PATCH 34/44] Update log levels --- CMake/CrayDetection.cmake | 1107 +++++++++++++------------------------ 1 file changed, 392 insertions(+), 715 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 94424405b4..8299749e8f 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -2,62 +2,67 @@ # Cray System Auto-Detection and Workarounds # ============================================================================== # This module detects Cray systems and automatically applies workarounds for -# common build issues. Each fix corresponds to a checklist item. +# common build issues. Each fix corresponds to a documented checklist item. +# +# CMake 3.25+ version using standard message log levels: +# cmake .. # Quiet (STATUS messages only) +# cmake --log-level=VERBOSE .. # Show detection details +# cmake --log-level=DEBUG .. # Show all diagnostics +# cmake --log-context .. # Show message hierarchy # # Options: -# -DERF_DISABLE_CRAY_AUTO_FIXES=ON : Disable automatic Cray system fixes -# -DERF_VERBOSE_CRAY_FIXES=ON : Show detailed info for each fix +# -DERF_DISABLE_CRAY_AUTO_FIXES=ON : Disable automatic Cray system fixes # ============================================================================== option(ERF_DISABLE_CRAY_AUTO_FIXES "Disable automatic Cray system fixes" OFF) -option(ERF_VERBOSE_CRAY_FIXES "Show verbose output for Cray fixes" OFF) -# Helper macro for verbose messages -macro(erf_cray_verbose) - if(ERF_VERBOSE_CRAY_FIXES) - message(STATUS " [VERBOSE] ${ARGN}") - endif() -endmacro() +# Set Cray context for hierarchical logging +list(APPEND CMAKE_MESSAGE_CONTEXT "Cray") if(ERF_DISABLE_CRAY_AUTO_FIXES) - message(STATUS "ERF: Cray auto-fixes disabled by user") + message(STATUS "Auto-fixes disabled by user") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) return() endif() +message(DEBUG "Starting Cray detection and workaround application") + # ============================================================================== # Detect Cray Environment # ============================================================================== set(ERF_ON_CRAY FALSE) -erf_cray_verbose("Checking for Cray environment...") +message(DEBUG "Checking for Cray environment") +message(TRACE " CMAKE_C_COMPILER: ${CMAKE_C_COMPILER}") +message(TRACE " CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") +message(TRACE " CRAY_MPICH_DIR: $ENV{CRAY_MPICH_DIR}") # Check for Cray compiler wrappers if(CMAKE_C_COMPILER MATCHES ".*cc$" AND CMAKE_CXX_COMPILER MATCHES ".*CC$" AND DEFINED ENV{CRAY_MPICH_DIR}) set(ERF_ON_CRAY TRUE) - message(STATUS "ERF: Detected Cray system") - message(STATUS " CMAKE_C_COMPILER = ${CMAKE_C_COMPILER}") - message(STATUS " CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") - message(STATUS " CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") - erf_cray_verbose("Detection method: Cray compiler wrappers (cc, CC) + CRAY_MPICH_DIR") + message(STATUS "Detected Cray system via compiler wrappers") + message(VERBOSE " C compiler: ${CMAKE_C_COMPILER}") + message(VERBOSE " C++ compiler: ${CMAKE_CXX_COMPILER}") + message(VERBOSE " CRAY_MPICH_DIR: $ENV{CRAY_MPICH_DIR}") endif() # Additional check for Cray environment variables if(DEFINED ENV{CRAYPE_VERSION}) set(ERF_ON_CRAY TRUE) - message(STATUS "ERF: Detected Cray Programming Environment") - message(STATUS " CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") - erf_cray_verbose("Detection method: CRAYPE_VERSION environment variable") + message(STATUS "Detected Cray Programming Environment") + message(VERBOSE " CRAYPE_VERSION: $ENV{CRAYPE_VERSION}") endif() if(NOT ERF_ON_CRAY) - message(STATUS "ERF: Not on a Cray system, skipping Cray-specific fixes") - erf_cray_verbose("CMAKE_C_COMPILER = ${CMAKE_C_COMPILER}") - erf_cray_verbose("CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") - erf_cray_verbose("CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") - erf_cray_verbose("CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") + message(STATUS "Not on a Cray system, skipping Cray-specific fixes") + message(DEBUG "Detection criteria not met:") + message(DEBUG " Compiler wrappers cc/CC: NO") + message(DEBUG " CRAY_MPICH_DIR set: NO") + message(DEBUG " CRAYPE_VERSION set: NO") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) return() endif() @@ -65,16 +70,11 @@ endif() # Compiler Version Checks # ============================================================================== -message(STATUS "ERF: Checking compiler versions...") +message(VERBOSE "Checking compiler versions") -# ----------------------------------------------------------------------------- # GCC Version Check (for std::filesystem support) -# ----------------------------------------------------------------------------- -# ERF uses C++17 which requires GCC 8.0+ -# Older GCC versions will fail with "fatal error: filesystem: No such file" - if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - message(STATUS " Detected GNU C++ compiler version: ${CMAKE_CXX_COMPILER_VERSION}") + message(VERBOSE "Detected GNU compiler: ${CMAKE_CXX_COMPILER_VERSION}") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.0") message(FATAL_ERROR @@ -84,300 +84,204 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") "Found: GCC ${CMAKE_CXX_COMPILER_VERSION}\n" "════════════════════════════════════════════════════════════════\n" "\n" - "On Cray systems, fix by using the Cray wrapper with a modern compiler:\n" - " 1. Load a newer compiler module:\n" - " module load PrgEnv-gnu\n" - " module load gcc\n" - "\n" - " 2. Set compiler explicitly:\n" - " -DCMAKE_CXX_COMPILER=\$(which CC)\n" - " Or set environment variable:\n" - " export CXX=\$(which CC)\n" - "\n" - " 3. Verify compiler version:\n" - " CC --version\n" + "On Cray systems:\n" + " 1. Load newer compiler: module load PrgEnv-gnu gcc\n" + " 2. Verify version: CC --version\n" "") - else() - message(STATUS " GCC version ${CMAKE_CXX_COMPILER_VERSION} >= 8.0") - erf_cray_verbose("GCC version sufficient for C++17 ") endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") - message(STATUS " Detected Cray C++ compiler version: ${CMAKE_CXX_COMPILER_VERSION}") - erf_cray_verbose("Cray compiler wrappers detected") - # Cray wrappers forward to underlying compiler - check what's loaded + message(DEBUG "GCC ${CMAKE_CXX_COMPILER_VERSION} >= 8.0 (C++17 filesystem supported)") + +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + message(VERBOSE "Detected Cray compiler: ${CMAKE_CXX_COMPILER_VERSION}") if(DEFINED ENV{PE_ENV}) - message(STATUS " Programming Environment: $ENV{PE_ENV}") - erf_cray_verbose("PE_ENV = $ENV{PE_ENV}") + message(DEBUG "Programming Environment: $ENV{PE_ENV}") endif() else() - message(STATUS " Detected C++ compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") + message(VERBOSE "Detected ${CMAKE_CXX_COMPILER_ID} compiler: ${CMAKE_CXX_COMPILER_VERSION}") endif() -# ----------------------------------------------------------------------------- -# GPU Compiler Checks (for CUDA builds) -# ----------------------------------------------------------------------------- -# Kokkos and EKAT read CMAKE_CUDA_COMPILER and CMAKE_CUDA_FLAGS -# We need to ensure these are set correctly for Cray systems - +# GPU Compiler Checks if(ERF_ENABLE_CUDA) - message(STATUS "") - message(STATUS "ERF: Checking GPU compiler configuration...") + message(VERBOSE "Checking CUDA compiler configuration") - # Check if CMAKE_CUDA_COMPILER is set if(CMAKE_CUDA_COMPILER) - message(STATUS " CMAKE_CUDA_COMPILER = ${CMAKE_CUDA_COMPILER}") - erf_cray_verbose("CUDA compiler explicitly set by user or CMake") + message(DEBUG "CMAKE_CUDA_COMPILER: ${CMAKE_CUDA_COMPILER}") else() - message(STATUS " CMAKE_CUDA_COMPILER not set (CMake will auto-detect)") - erf_cray_verbose("CMake will search for nvcc in PATH") + message(DEBUG "CMAKE_CUDA_COMPILER not set (will auto-detect)") endif() - # Check if CMAKE_CUDA_FLAGS has been set if(CMAKE_CUDA_FLAGS) - message(STATUS " CMAKE_CUDA_FLAGS = ${CMAKE_CUDA_FLAGS}") - erf_cray_verbose("CUDA flags explicitly set by user") - else() - message(STATUS " CMAKE_CUDA_FLAGS not set (will be auto-configured)") - erf_cray_verbose("Cray-specific CUDA flags will be added by Fix 1 if needed") + message(DEBUG "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") endif() - # ------------------------------------------------------------------------- # Detect AMReX CUDA architecture - # Priority: CMake var > AMREX_CUDA_ARCH env > CMAKE_CUDA_ARCH env > CRAY_ACCEL_TARGET - # ------------------------------------------------------------------------- + message(DEBUG "Detecting CUDA architecture") if(AMReX_CUDA_ARCH) - message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH} (user specified)") - erf_cray_verbose("AMReX CUDA arch set via CMake variable") + message(VERBOSE "AMReX_CUDA_ARCH: ${AMReX_CUDA_ARCH} (user specified)") elseif(DEFINED ENV{AMREX_CUDA_ARCH}) set(AMReX_CUDA_ARCH "$ENV{AMREX_CUDA_ARCH}" CACHE STRING "CUDA arch from AMREX_CUDA_ARCH") - message(STATUS " AMReX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH} (from AMREX_CUDA_ARCH)") - erf_cray_verbose("AMReX CUDA arch from AMREX_CUDA_ARCH environment variable") + message(VERBOSE "AMReX_CUDA_ARCH: $ENV{AMREX_CUDA_ARCH} (from environment)") elseif(DEFINED ENV{CMAKE_CUDA_ARCH}) - # Common in build scripts: CMAKE_CUDA_ARCH="80" set(ENV_CUDA_ARCH "$ENV{CMAKE_CUDA_ARCH}") + message(DEBUG "Found CMAKE_CUDA_ARCH: ${ENV_CUDA_ARCH}") - # Convert to AMReX format (add decimal point if needed) + # Convert to AMReX format if(ENV_CUDA_ARCH MATCHES "^[0-9][0-9]$") - # Two-digit format: 70, 80, 90 -> 7.0, 8.0, 9.0 string(SUBSTRING "${ENV_CUDA_ARCH}" 0 1 MAJOR) string(SUBSTRING "${ENV_CUDA_ARCH}" 1 1 MINOR) set(DETECTED_CUDA_ARCH "${MAJOR}.${MINOR}") + message(TRACE "Converted ${ENV_CUDA_ARCH} -> ${DETECTED_CUDA_ARCH}") else() - # Already in decimal format or other format set(DETECTED_CUDA_ARCH "${ENV_CUDA_ARCH}") endif() set(AMReX_CUDA_ARCH "${DETECTED_CUDA_ARCH}" CACHE STRING "CUDA arch from CMAKE_CUDA_ARCH") - message(STATUS " AMReX_CUDA_ARCH = ${DETECTED_CUDA_ARCH} (from CMAKE_CUDA_ARCH=${ENV_CUDA_ARCH})") - erf_cray_verbose("Converted CMAKE_CUDA_ARCH=${ENV_CUDA_ARCH} -> AMReX_CUDA_ARCH=${DETECTED_CUDA_ARCH}") + message(VERBOSE "AMReX_CUDA_ARCH: ${DETECTED_CUDA_ARCH} (from CMAKE_CUDA_ARCH)") elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) - # Auto-detect from Cray accelerator module (set by 'module load gpu') set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") - message(STATUS " Detected CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + message(VERBOSE "CRAY_ACCEL_TARGET: ${CRAY_ACCEL_TARGET}") if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") set(AMReX_CUDA_ARCH "7.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_CUDA_ARCH = 7.0 (Tesla V100 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_CUDA_ARCH: 7.0 (Tesla V100)") elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") set(AMReX_CUDA_ARCH "8.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_CUDA_ARCH = 8.0 (A100 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_CUDA_ARCH: 8.0 (A100)") elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") set(AMReX_CUDA_ARCH "9.0" CACHE STRING "CUDA arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_CUDA_ARCH = 9.0 (H100 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_CUDA_ARCH: 9.0 (H100)") else() - message(WARNING "ERF: Unknown CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + message(WARNING "Unknown CRAY_ACCEL_TARGET: ${CRAY_ACCEL_TARGET}") endif() - erf_cray_verbose("AMReX CUDA arch from CRAY_ACCEL_TARGET module variable") else() - message(WARNING "") - message(WARNING "ERF: AMReX_CUDA_ARCH not detected") - message(WARNING " For Perlmutter: module load gpu") - message(WARNING " Or set: export CMAKE_CUDA_ARCH=80") - message(WARNING " Or set: -DAMReX_CUDA_ARCH=8.0") - message(WARNING "") + message(WARNING "AMReX_CUDA_ARCH not detected") + message(STATUS " For Perlmutter: module load gpu") + message(STATUS " Or: export CMAKE_CUDA_ARCH=80") + message(STATUS " Or: -DAMReX_CUDA_ARCH=8.0") endif() - endif() -# ----------------------------------------------------------------------------- -# Detect AMReX AMD architecture (for HIP builds) -# Priority: CMake var > AMREX_AMD_ARCH env > CMAKE_AMD_ARCH env > CRAY_ACCEL_TARGET -# ----------------------------------------------------------------------------- - +# AMD GPU Architecture Detection (HIP) if(AMReX_GPU_BACKEND MATCHES "HIP" OR ERF_ENABLE_HIP) - message(STATUS "") - message(STATUS "ERF: Checking HIP/ROCm compiler configuration...") + message(VERBOSE "Checking HIP/ROCm configuration") if(AMReX_AMD_ARCH) - message(STATUS " AMReX_AMD_ARCH = ${AMReX_AMD_ARCH} (user specified)") - erf_cray_verbose("AMReX AMD arch set via CMake variable") - + message(VERBOSE "AMReX_AMD_ARCH: ${AMReX_AMD_ARCH} (user specified)") elseif(DEFINED ENV{AMREX_AMD_ARCH}) set(AMReX_AMD_ARCH "$ENV{AMREX_AMD_ARCH}" CACHE STRING "AMD arch from AMREX_AMD_ARCH") - message(STATUS " AMReX_AMD_ARCH = $ENV{AMREX_AMD_ARCH} (from AMREX_AMD_ARCH)") - erf_cray_verbose("AMReX AMD arch from AMREX_AMD_ARCH environment variable") - + message(VERBOSE "AMReX_AMD_ARCH: $ENV{AMREX_AMD_ARCH} (from environment)") elseif(DEFINED ENV{CMAKE_AMD_ARCH}) set(AMReX_AMD_ARCH "$ENV{CMAKE_AMD_ARCH}" CACHE STRING "AMD arch from CMAKE_AMD_ARCH") - message(STATUS " AMReX_AMD_ARCH = $ENV{CMAKE_AMD_ARCH} (from CMAKE_AMD_ARCH)") - erf_cray_verbose("AMReX AMD arch from CMAKE_AMD_ARCH environment variable") - + message(VERBOSE "AMReX_AMD_ARCH: $ENV{CMAKE_AMD_ARCH} (from CMAKE_AMD_ARCH)") elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) - # Auto-detect from Cray accelerator module set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") - message(STATUS " Detected CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + message(VERBOSE "CRAY_ACCEL_TARGET: ${CRAY_ACCEL_TARGET}") if(CRAY_ACCEL_TARGET STREQUAL "amd_gfx90a") set(AMReX_AMD_ARCH "gfx90a" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_AMD_ARCH = gfx90a (MI200 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_AMD_ARCH: gfx90a (MI200)") elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx908") set(AMReX_AMD_ARCH "gfx908" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_AMD_ARCH = gfx908 (MI100 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_AMD_ARCH: gfx908 (MI100)") elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx942") set(AMReX_AMD_ARCH "gfx942" CACHE STRING "AMD arch from CRAY_ACCEL_TARGET") - message(STATUS " AMReX_AMD_ARCH = gfx942 (MI300 from CRAY_ACCEL_TARGET)") + message(VERBOSE "AMReX_AMD_ARCH: gfx942 (MI300)") else() - message(WARNING "ERF: Unknown CRAY_ACCEL_TARGET = ${CRAY_ACCEL_TARGET}") + message(WARNING "Unknown CRAY_ACCEL_TARGET: ${CRAY_ACCEL_TARGET}") endif() - erf_cray_verbose("AMReX AMD arch from CRAY_ACCEL_TARGET module variable") else() - message(WARNING "") - message(WARNING "ERF: AMReX_AMD_ARCH not detected") - message(WARNING " For Frontier: module load craype-accel-amd-gfx90a") - message(WARNING " Or set: export CMAKE_AMD_ARCH=gfx90a") - message(WARNING " Or set: -DAMReX_AMD_ARCH=gfx90a") - message(WARNING "") + message(WARNING "AMReX_AMD_ARCH not detected") + message(STATUS " For Frontier: module load craype-accel-amd-gfx90a") + message(STATUS " Or: export CMAKE_AMD_ARCH=gfx90a") endif() - endif() -# ------------------------------------------------------------------------- -# Detect Kokkos architecture (for EKAT builds) -# Priority: CMake var > KOKKOS_GPU_ARCH env > CRAY_ACCEL_TARGET -# ------------------------------------------------------------------------- +# Kokkos Architecture Detection (for EKAT physics) if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) - message(STATUS "") - message(STATUS " EKAT-based physics enabled, checking Kokkos architecture...") + message(VERBOSE "EKAT-based physics enabled, checking Kokkos architecture") - # Check if user already set Kokkos_ARCH_* via CMake set(KOKKOS_ARCH_SET FALSE) - # Check for CUDA architectures - if(Kokkos_ARCH_VOLTA70 OR Kokkos_ARCH_AMPERE80 OR Kokkos_ARCH_HOPPER90) - set(KOKKOS_ARCH_SET TRUE) - message(STATUS " Kokkos CUDA arch already set by user") - erf_cray_verbose("User specified Kokkos CUDA architecture via CMake variable") - - # Check for AMD architectures - elseif(Kokkos_ARCH_VEGA90A OR Kokkos_ARCH_VEGA908 OR Kokkos_ARCH_MI300A) + # Check if user already set via CMake + if(Kokkos_ARCH_VOLTA70 OR Kokkos_ARCH_AMPERE80 OR Kokkos_ARCH_HOPPER90 OR + Kokkos_ARCH_VEGA90A OR Kokkos_ARCH_VEGA908 OR Kokkos_ARCH_MI300A) set(KOKKOS_ARCH_SET TRUE) - message(STATUS " Kokkos AMD arch already set by user") - erf_cray_verbose("User specified Kokkos AMD architecture via CMake variable") + message(VERBOSE "Kokkos architecture already set by user") elseif(DEFINED ENV{KOKKOS_GPU_ARCH}) - # Detect from KOKKOS_GPU_ARCH environment variable (build scripts) set(KOKKOS_GPU_ARCH_ENV "$ENV{KOKKOS_GPU_ARCH}") - message(STATUS " Detected KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") + message(VERBOSE "KOKKOS_GPU_ARCH: ${KOKKOS_GPU_ARCH_ENV}") - # Map NVIDIA architectures + # Map to Kokkos arch variables if(KOKKOS_GPU_ARCH_ENV STREQUAL "VOLTA70") set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VOLTA70 -> Kokkos_ARCH_VOLTA70=ON") - + message(DEBUG "Mapped VOLTA70 -> Kokkos_ARCH_VOLTA70") elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "AMPERE80") set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=AMPERE80 -> Kokkos_ARCH_AMPERE80=ON") - + message(DEBUG "Mapped AMPERE80 -> Kokkos_ARCH_AMPERE80") elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "HOPPER90") set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=HOPPER90 -> Kokkos_ARCH_HOPPER90=ON") - - # Map AMD architectures + message(DEBUG "Mapped HOPPER90 -> Kokkos_ARCH_HOPPER90") elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "VEGA90A") set(Kokkos_ARCH_VEGA90A ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_VEGA90A = ON") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VEGA90A -> Kokkos_ARCH_VEGA90A=ON") - + message(DEBUG "Mapped VEGA90A -> Kokkos_ARCH_VEGA90A") elseif(KOKKOS_GPU_ARCH_ENV STREQUAL "VEGA908") set(Kokkos_ARCH_VEGA908 ON CACHE BOOL "Kokkos arch from KOKKOS_GPU_ARCH") - message(STATUS " Set Kokkos_ARCH_VEGA908 = ON") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped KOKKOS_GPU_ARCH=VEGA908 -> Kokkos_ARCH_VEGA908=ON") - + message(DEBUG "Mapped VEGA908 -> Kokkos_ARCH_VEGA908") else() - message(WARNING "ERF: Unknown KOKKOS_GPU_ARCH = ${KOKKOS_GPU_ARCH_ENV}") - message(WARNING " Expected: VOLTA70, AMPERE80, HOPPER90, VEGA90A, or VEGA908") + message(WARNING "Unknown KOKKOS_GPU_ARCH: ${KOKKOS_GPU_ARCH_ENV}") endif() elseif(DEFINED ENV{CRAY_ACCEL_TARGET}) - # Fall back to CRAY_ACCEL_TARGET (set by 'module load gpu' or 'module load craype-accel-*') set(CRAY_ACCEL_TARGET "$ENV{CRAY_ACCEL_TARGET}") + message(DEBUG "Using CRAY_ACCEL_TARGET for Kokkos: ${CRAY_ACCEL_TARGET}") - # NVIDIA targets + # Map NVIDIA targets if(CRAY_ACCEL_TARGET STREQUAL "nvidia70") set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_VOLTA70 = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia70 -> Kokkos_ARCH_VOLTA70=ON") - + message(VERBOSE "Set Kokkos_ARCH_VOLTA70 from CRAY_ACCEL_TARGET") elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia80") set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_AMPERE80 = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia80 -> Kokkos_ARCH_AMPERE80=ON") - + message(VERBOSE "Set Kokkos_ARCH_AMPERE80 from CRAY_ACCEL_TARGET") elseif(CRAY_ACCEL_TARGET STREQUAL "nvidia90") set(Kokkos_ARCH_HOPPER90 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_HOPPER90 = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=nvidia90 -> Kokkos_ARCH_HOPPER90=ON") - - # AMD targets + message(VERBOSE "Set Kokkos_ARCH_HOPPER90 from CRAY_ACCEL_TARGET") + # Map AMD targets elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx90a") set(Kokkos_ARCH_VEGA90A ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_VEGA90A = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx90a -> Kokkos_ARCH_VEGA90A=ON") - + message(VERBOSE "Set Kokkos_ARCH_VEGA90A from CRAY_ACCEL_TARGET") elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx908") set(Kokkos_ARCH_VEGA908 ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_VEGA908 = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx908 -> Kokkos_ARCH_VEGA908=ON") - + message(VERBOSE "Set Kokkos_ARCH_VEGA908 from CRAY_ACCEL_TARGET") elseif(CRAY_ACCEL_TARGET STREQUAL "amd_gfx942") set(Kokkos_ARCH_MI300A ON CACHE BOOL "Kokkos arch from CRAY_ACCEL_TARGET") - message(STATUS " Set Kokkos_ARCH_MI300A = ON (from CRAY_ACCEL_TARGET)") set(KOKKOS_ARCH_SET TRUE) - erf_cray_verbose("Mapped CRAY_ACCEL_TARGET=amd_gfx942 -> Kokkos_ARCH_MI300A=ON") + message(VERBOSE "Set Kokkos_ARCH_MI300A from CRAY_ACCEL_TARGET") endif() endif() if(NOT KOKKOS_ARCH_SET) - message(WARNING "") - message(WARNING "ERF: Kokkos architecture not detected") - message(WARNING " For Perlmutter: module load gpu") - message(WARNING " For Frontier: module load craype-accel-amd-gfx90a") - message(WARNING " Or set: export KOKKOS_GPU_ARCH=AMPERE80 (or VEGA90A)") - message(WARNING " Or set: -DKokkos_ARCH_AMPERE80=ON (or -DKokkos_ARCH_VEGA90A=ON)") - message(WARNING "") + message(WARNING "Kokkos architecture not detected") + message(STATUS " For Perlmutter: module load gpu") + message(STATUS " For Frontier: module load craype-accel-amd-gfx90a") + message(STATUS " Or: export KOKKOS_GPU_ARCH=AMPERE80") else() - message(STATUS "") - message(STATUS " Note: After Kokkos configures, CMAKE_CUDA_ARCHITECTURES") - message(STATUS " will be set from Kokkos_CUDA_ARCHITECTURES") - erf_cray_verbose("Kokkos will set CMAKE_CUDA_ARCHITECTURES when CUDA language is enabled") + message(DEBUG "Note: Kokkos will set CMAKE_CUDA_ARCHITECTURES when CUDA language is enabled") endif() endif() @@ -385,212 +289,136 @@ endif() # Prerequisite Checks # ============================================================================== -message(STATUS "ERF: Checking Cray prerequisites...") +message(VERBOSE "Checking prerequisites") -# ----------------------------------------------------------------------------- # CMake Version Check -# ----------------------------------------------------------------------------- -# Cray systems work best with CMake 3.24.0+ -# Earlier versions may have issues with Cray wrappers and CUDA when NVHPC is splayed - set(ERF_RECOMMENDED_CMAKE_VERSION "3.24.0") - if(CMAKE_VERSION VERSION_LESS ${ERF_RECOMMENDED_CMAKE_VERSION}) - message(WARNING - "\n" - "ERF: CMake version ${CMAKE_VERSION} detected\n" - " Recommended minimum for Cray systems: ${ERF_RECOMMENDED_CMAKE_VERSION}\n" - " You may experience issues with Cray compiler wrappers and CUDA\n" - "\n" - " To fix:\n" - " module load cmake\n" - "") - - erf_cray_verbose("Current CMake: ${CMAKE_VERSION}") - erf_cray_verbose("Recommended: ${ERF_RECOMMENDED_CMAKE_VERSION}+") - erf_cray_verbose("Known issues with older CMake on Cray:") - erf_cray_verbose(" - CUDA language detection failures") - erf_cray_verbose(" - Incorrect compiler wrapper handling") - erf_cray_verbose(" - Missing Cray-specific find modules") + message(WARNING "CMake ${CMAKE_VERSION} < recommended ${ERF_RECOMMENDED_CMAKE_VERSION}") + message(STATUS " Fix: module load cmake") + message(DEBUG "Older CMake may have issues with Cray wrappers and CUDA") else() - message(STATUS " CMake version ${CMAKE_VERSION} >= ${ERF_RECOMMENDED_CMAKE_VERSION}") - erf_cray_verbose("CMake version check passed") + message(DEBUG "CMake ${CMAKE_VERSION} >= ${ERF_RECOMMENDED_CMAKE_VERSION}") endif() -# ----------------------------------------------------------------------------- # CUDA Toolkit Check -# ----------------------------------------------------------------------------- -# When building with CUDA, the cudatoolkit module should be loaded -# This sets CUDA_HOME and other necessary environment variables - if(ERF_ENABLE_CUDA) - message(STATUS " Checking for CUDA toolkit...") + message(VERBOSE "Checking CUDA toolkit") set(CUDA_TOOLKIT_LOADED FALSE) - # Check for CUDA_HOME (set by cudatoolkit module) if(DEFINED ENV{CUDA_HOME}) - message(STATUS " CUDA_HOME = $ENV{CUDA_HOME}") set(CUDA_TOOLKIT_LOADED TRUE) - erf_cray_verbose("CUDA toolkit appears to be loaded (CUDA_HOME set)") + message(VERBOSE " CUDA_HOME: $ENV{CUDA_HOME}") endif() - # Additional check for CUDATOOLKIT_HOME (alternative Cray variable) - if(DEFINED ENV{CUDATOOLKIT_HOME}) - message(STATUS " CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") + if(NOT CUDA_TOOLKIT_LOADED AND DEFINED ENV{CUDATOOLKIT_HOME}) set(CUDA_TOOLKIT_LOADED TRUE) - erf_cray_verbose("CUDA toolkit appears to be loaded (CUDATOOLKIT_HOME set)") + message(VERBOSE " CUDATOOLKIT_HOME: $ENV{CUDATOOLKIT_HOME}") endif() - # Check for nvcc in PATH find_program(NVCC_EXECUTABLE nvcc) if(NVCC_EXECUTABLE) - message(STATUS " Found nvcc: ${NVCC_EXECUTABLE}") set(CUDA_TOOLKIT_LOADED TRUE) - erf_cray_verbose("nvcc found in PATH") + message(VERBOSE " nvcc: ${NVCC_EXECUTABLE}") endif() - # Warn if CUDA toolkit doesn't appear to be loaded if(NOT CUDA_TOOLKIT_LOADED) - message(WARNING - "\n" - "ERF: CUDA enabled but CUDA toolkit not detected\n" - " Expected environment variables not found:\n" - " - CUDA_HOME\n" - " - CUDATOOLKIT_HOME\n" - " - nvcc in PATH\n" - "\n" - " To fix:\n" - " module load cudatoolkit\n" - " Or on newer systems:\n" - " module load cuda\n" - "\n" - " Build may fail with CUDA-related errors\n" - "") - - erf_cray_verbose("CUDA_HOME = $ENV{CUDA_HOME}") - erf_cray_verbose("CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") - erf_cray_verbose("nvcc search result: ${NVCC_EXECUTABLE}") - endif() - - # Check CUDA architecture is set for GPU builds - if(NOT AMReX_CUDA_ARCH AND NOT DEFINED ENV{AMREX_CUDA_ARCH}) - message(WARNING "") - message(WARNING "ERF: CUDA enabled but GPU architecture not specified") - message(WARNING " Set AMReX_CUDA_ARCH for optimal performance") - message(WARNING " For Perlmutter A100 GPUs:") - message(WARNING " -DAMReX_CUDA_ARCH=8.0") - message(WARNING " Or set in environment:") - message(WARNING " export AMREX_CUDA_ARCH=8.0") - message(WARNING "") - - erf_cray_verbose("AMReX_CUDA_ARCH not set (will use CMake default)") - else() - if(AMReX_CUDA_ARCH) - message(STATUS " AMReX_CUDA_ARCH = ${AMReX_CUDA_ARCH}") - else() - message(STATUS " AMREX_CUDA_ARCH = $ENV{AMREX_CUDA_ARCH}") - endif() + message(WARNING "CUDA toolkit not detected") + message(STATUS " Fix: module load cuda") endif() else() - erf_cray_verbose("CUDA not enabled, skipping CUDA toolkit checks") + message(DEBUG "CUDA not enabled, skipping toolkit check") endif() -# ----------------------------------------------------------------------------- # NetCDF Module Check -# ----------------------------------------------------------------------------- - if(ERF_ENABLE_NETCDF) - message(STATUS " Checking for NetCDF...") - - set(NETCDF_LOADED FALSE) + message(VERBOSE "Checking NetCDF") if(DEFINED ENV{NETCDF_DIR}) - message(STATUS " NETCDF_DIR = $ENV{NETCDF_DIR}") - set(NETCDF_LOADED TRUE) - endif() - - if(NOT NETCDF_LOADED) - message(WARNING "") - message(WARNING "ERF: NetCDF enabled but NETCDF_DIR not set") - message(WARNING " To fix:") - message(WARNING " module load cray-netcdf-hdf5parallel") - message(WARNING " Or:") - message(WARNING " module load cray-netcdf") - message(WARNING "") - - erf_cray_verbose("NETCDF_DIR not found in environment") + message(VERBOSE " NETCDF_DIR: $ENV{NETCDF_DIR}") + else() + message(STATUS " NetCDF module not detected") + message(STATUS " Recommended: module load cray-netcdf-hdf5parallel") endif() else() - erf_cray_verbose("NetCDF not enabled, skipping NetCDF checks") + message(DEBUG "NetCDF not enabled") endif() -# ----------------------------------------------------------------------------- -# HDF5 Module Check (for AMReX HDF5 support) -# ----------------------------------------------------------------------------- - +# HDF5 Module Check if(AMReX_HDF5) - message(STATUS " Checking for HDF5...") - - set(HDF5_LOADED FALSE) - + message(VERBOSE "Checking HDF5") + if(DEFINED ENV{HDF5_DIR}) - message(STATUS " HDF5_DIR = $ENV{HDF5_DIR}") - set(HDF5_LOADED TRUE) + message(VERBOSE " HDF5_DIR: $ENV{HDF5_DIR}") elseif(DEFINED ENV{HDF5_ROOT}) - message(STATUS " HDF5_ROOT = $ENV{HDF5_ROOT}") - set(HDF5_LOADED TRUE) + message(VERBOSE " HDF5_ROOT: $ENV{HDF5_ROOT}") + else() + message(STATUS " HDF5 module not detected") + message(STATUS " Recommended: module load cray-hdf5-parallel") endif() +else() + message(DEBUG "HDF5 not enabled") +endif() - if(NOT HDF5_LOADED) - message(WARNING "") - message(WARNING "ERF: HDF5 enabled but HDF5_DIR/HDF5_ROOT not set") - message(WARNING " To fix:") - message(WARNING " module load cray-hdf5-parallel") - message(WARNING "") - - erf_cray_verbose("HDF5_DIR/HDF5_ROOT not found in environment") +# FFTW Module Check +if(ERF_ENABLE_FFT) + message(VERBOSE "Checking FFTW") + + if(DEFINED ENV{FFTW_DIR}) + message(VERBOSE " FFTW_DIR: $ENV{FFTW_DIR}") + elseif(DEFINED ENV{CRAY_FFTW_DIR}) + message(VERBOSE " CRAY_FFTW_DIR: $ENV{CRAY_FFTW_DIR}") + else() + message(STATUS " FFTW module not detected") + message(STATUS " Recommended: module load cray-fftw") endif() else() - erf_cray_verbose("HDF5 not enabled, skipping HDF5 checks") + message(DEBUG "FFTW not enabled") endif() -# ----------------------------------------------------------------------------- -# Module Environment Summary -# ----------------------------------------------------------------------------- - -if(ERF_VERBOSE_CRAY_FIXES) - message(STATUS "") - message(STATUS "[VERBOSE] Key environment variables:") - message(STATUS "[VERBOSE] CRAYPE_VERSION = $ENV{CRAYPE_VERSION}") - message(STATUS "[VERBOSE] CRAY_MPICH_DIR = $ENV{CRAY_MPICH_DIR}") - message(STATUS "[VERBOSE] MPICH_DIR = $ENV{MPICH_DIR}") - message(STATUS "[VERBOSE] CUDA_HOME = $ENV{CUDA_HOME}") - message(STATUS "[VERBOSE] CUDATOOLKIT_HOME = $ENV{CUDATOOLKIT_HOME}") - message(STATUS "[VERBOSE] NETCDF_DIR = $ENV{NETCDF_DIR}") - message(STATUS "[VERBOSE] HDF5_DIR = $ENV{HDF5_DIR}") - message(STATUS "[VERBOSE] MPICH_GPU_SUPPORT = $ENV{MPICH_GPU_SUPPORT_ENABLED}") - message(STATUS "") +# E3SM Submodule Check +if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) + message(VERBOSE "Checking E3SM submodule") + + set(E3SM_EXPECTED_PATH "${CMAKE_SOURCE_DIR}/external/E3SM") + + if(EXISTS "${E3SM_EXPECTED_PATH}") + file(GLOB E3SM_CONTENTS "${E3SM_EXPECTED_PATH}/*") + if(E3SM_CONTENTS) + message(VERBOSE " E3SM submodule found") + else() + message(WARNING "E3SM directory exists but is empty") + message(STATUS " Fix: git submodule update --init --recursive external/E3SM") + endif() + else() + message(WARNING "E3SM submodule not found") + message(STATUS " Fix: git submodule update --init --recursive external/E3SM") + endif() +else() + message(DEBUG "EKAT physics not enabled, skipping E3SM check") endif() -message(STATUS "") +# Environment summary (DEBUG level) +message(DEBUG "Key environment variables:") +message(DEBUG " CRAYPE_VERSION: $ENV{CRAYPE_VERSION}") +message(DEBUG " CRAY_MPICH_DIR: $ENV{CRAY_MPICH_DIR}") +message(DEBUG " CUDA_HOME: $ENV{CUDA_HOME}") +message(DEBUG " NETCDF_DIR: $ENV{NETCDF_DIR}") +message(DEBUG " HDF5_DIR: $ENV{HDF5_DIR}") +message(DEBUG " FFTW_DIR: $ENV{FFTW_DIR}") +message(DEBUG " MPICH_GPU_SUPPORT_ENABLED: $ENV{MPICH_GPU_SUPPORT_ENABLED}") # ============================================================================== -# Fix 1: CUDA + EKAT -> nvcc_wrapper complications (Checklist Item 1) +# Fix 1: CUDA + EKAT -> nvcc_wrapper complications # ============================================================================== -# PROBLEM: When building with EKAT, we get nvcc_wrapper which can cause -# "mpi.h not found" errors because nvcc_wrapper doesn't know about -# Cray's include paths -# SOLUTION: Add Cray compiler flags to CUDA compilation via --cray-print-opts if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3)) - message(STATUS "ERF: [Fix 1] Applying CUDA+EKAT nvcc_wrapper fix") + message(STATUS "Applying Fix 1: CUDA+EKAT nvcc_wrapper") - erf_cray_verbose("Problem: EKAT uses nvcc_wrapper which doesn't inherit Cray paths") - erf_cray_verbose("Condition: ERF_ENABLE_CUDA=ON and (RRTMGP or SHOC or P3 enabled)") - erf_cray_verbose("Solution: Add Cray-specific flags from 'CC --cray-print-opts=cflags'") + message(DEBUG "Problem: nvcc_wrapper doesn't inherit Cray include paths") + message(DEBUG "Solution: Add flags from CC --cray-print-opts=cflags") - # Get Cray-specific flags execute_process( COMMAND ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags OUTPUT_VARIABLE CRAY_CUDA_FLAGS @@ -600,53 +428,36 @@ if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3)) ) if(CRAY_CUDA_FLAGS_RESULT EQUAL 0 AND CRAY_CUDA_FLAGS) - message(STATUS " Adding Cray flags to CUDA compilation") - erf_cray_verbose("Retrieved flags: ${CRAY_CUDA_FLAGS}") - erf_cray_verbose("Command used: ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags") + message(VERBOSE "Adding Cray flags to CMAKE_CUDA_FLAGS") + message(DEBUG "Flags: ${CRAY_CUDA_FLAGS}") if(CMAKE_CUDA_FLAGS) - erf_cray_verbose("Appending to existing CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CRAY_CUDA_FLAGS}" CACHE STRING "" FORCE) else() - erf_cray_verbose("Setting new CMAKE_CUDA_FLAGS") set(CMAKE_CUDA_FLAGS "${CRAY_CUDA_FLAGS}" CACHE STRING "" FORCE) endif() - - erf_cray_verbose("Final CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") else() - message(WARNING "ERF: Could not retrieve Cray CUDA flags") - message(WARNING " Command attempted: ${CMAKE_CXX_COMPILER} --cray-print-opts=cflags") - message(WARNING " Return code: ${CRAY_CUDA_FLAGS_RESULT}") - message(WARNING " You may need to set CMAKE_CUDA_FLAGS manually") - message(WARNING " Example: -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") + message(WARNING "Could not retrieve Cray CUDA flags") + message(STATUS " Try: -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") endif() else() - erf_cray_verbose("Fix 1 not needed (CUDA+EKAT not both enabled)") + message(DEBUG "Fix 1 not needed (CUDA+EKAT not both enabled)") endif() # ============================================================================== -# Fix 2: FCOMPARE + Cray -> mpi_gnu_123 not found (Checklist Item 2) +# Fix 2: FCOMPARE + Cray -> mpi_gnu_123 not found # ============================================================================== -# PROBLEM: When building with fcompare, Cray's --as-needed linker flag causes -# the linker to drop MPI libraries it thinks aren't needed, leading to -# "cannot find -lmpi_gnu_123" errors -# SOLUTION: Remove --as-needed from Cray library flags and add --no-as-needed if(ERF_ENABLE_FCOMPARE) - message(STATUS "ERF: [Fix 2] Applying fcompare linker fix") + message(STATUS "Applying Fix 2: fcompare linker") - erf_cray_verbose("Problem: Cray uses --as-needed which drops required MPI libs") - erf_cray_verbose("Condition: ERF_ENABLE_FCOMPARE=ON") - erf_cray_verbose("Solution: Clean Cray lib flags and add --no-as-needed") + message(DEBUG "Problem: --as-needed drops required MPI libs") + message(DEBUG "Solution: Clean Cray libs and add --no-as-needed") - # Get Cray library paths and clean them set(CRAY_LIBS_CLEAN "") - set(COMPILERS_CHECKED "") foreach(COMPILER IN ITEMS ${CMAKE_CXX_COMPILER} ${CMAKE_C_COMPILER} ${CMAKE_Fortran_COMPILER}) if(EXISTS ${COMPILER}) - erf_cray_verbose("Checking compiler: ${COMPILER}") - execute_process( COMMAND ${COMPILER} --cray-print-opts=libs OUTPUT_VARIABLE COMPILER_LIBS @@ -656,108 +467,70 @@ if(ERF_ENABLE_FCOMPARE) ) if(COMPILER_LIBS_RESULT EQUAL 0) - erf_cray_verbose(" Original libs: ${COMPILER_LIBS}") + message(TRACE "Libs from ${COMPILER}: ${COMPILER_LIBS}") - # Remove problematic --as-needed flags + # Remove problematic flags string(REGEX REPLACE "-Wl,--as-needed," "" COMPILER_LIBS "${COMPILER_LIBS}") string(REGEX REPLACE ",--no-as-needed" "" COMPILER_LIBS "${COMPILER_LIBS}") string(REGEX REPLACE ",-l" " -l" COMPILER_LIBS "${COMPILER_LIBS}") - erf_cray_verbose(" Cleaned libs: ${COMPILER_LIBS}") - set(CRAY_LIBS_CLEAN "${CRAY_LIBS_CLEAN} ${COMPILER_LIBS}") - list(APPEND COMPILERS_CHECKED ${COMPILER}) - else() - erf_cray_verbose(" Failed to get libs from ${COMPILER}") endif() endif() endforeach() if(CRAY_LIBS_CLEAN) - message(STATUS " Adding Cray linker flags: -Wl,--no-as-needed + libs") - erf_cray_verbose("Compilers checked: ${COMPILERS_CHECKED}") - erf_cray_verbose("Combined cleaned libs: ${CRAY_LIBS_CLEAN}") - erf_cray_verbose("Final linker flags: -Wl,--no-as-needed ${CRAY_LIBS_CLEAN}") + message(VERBOSE "Adding: -Wl,--no-as-needed + cleaned libs") + message(DEBUG "Cleaned libs: ${CRAY_LIBS_CLEAN}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed ${CRAY_LIBS_CLEAN}" CACHE STRING "" FORCE) - - erf_cray_verbose("CMAKE_EXE_LINKER_FLAGS updated") else() - message(WARNING "ERF: Could not retrieve Cray library paths") - message(WARNING " Fcompare may fail to link with: cannot find -lmpi_gnu_123") - message(WARNING " Workaround: Set CMAKE_EXE_LINKER_FLAGS manually") - message(WARNING " Example: -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed \$CRAY_LIBS_CLEAN\"") - erf_cray_verbose("No compilers returned valid library flags") + message(WARNING "Could not retrieve Cray library paths") + message(STATUS " Fcompare may fail to link") endif() else() - erf_cray_verbose("Fix 2 not needed (ERF_ENABLE_FCOMPARE=OFF)") + message(DEBUG "Fix 2 not needed (fcompare disabled)") endif() # ============================================================================== -# Fix 3: CUDA without cmake module -> math libs not found (Checklist Item 3) +# Fix 3: CUDA math libs not found # ============================================================================== -# PROBLEM: If 'module load cmake' isn't run, CMAKE_PREFIX_PATH may not include -# CUDA math libraries path, causing link errors for cuBLAS, cuRAND, etc. -# SOLUTION: Add $CUDA_HOME/../../math_libs/lib64 to CMAKE_PREFIX_PATH if(ERF_ENABLE_CUDA AND DEFINED ENV{CUDA_HOME}) set(CUDA_MATH_PATH "$ENV{CUDA_HOME}/../../math_libs/lib64") - erf_cray_verbose("Checking for CUDA math libraries...") - erf_cray_verbose("CUDA_HOME = $ENV{CUDA_HOME}") - erf_cray_verbose("Expected math libs path: ${CUDA_MATH_PATH}") + message(DEBUG "Checking CUDA math libs: ${CUDA_MATH_PATH}") if(EXISTS ${CUDA_MATH_PATH}) - message(STATUS "ERF: [Fix 3] Adding CUDA math libraries path") - message(STATUS " ${CUDA_MATH_PATH}") - - erf_cray_verbose("Problem: CUDA math libs may not be in default search path") - erf_cray_verbose("Condition: ERF_ENABLE_CUDA=ON and CUDA_HOME set") - erf_cray_verbose("Solution: Add CUDA_HOME/../../math_libs/lib64 to CMAKE_PREFIX_PATH") - erf_cray_verbose("Path exists: YES") + message(STATUS "Applying Fix 3: CUDA math libraries") + message(VERBOSE "Adding: ${CUDA_MATH_PATH}") list(APPEND CMAKE_PREFIX_PATH ${CUDA_MATH_PATH}) set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} CACHE STRING "" FORCE) - - erf_cray_verbose("CMAKE_PREFIX_PATH updated: ${CMAKE_PREFIX_PATH}") else() - message(WARNING "ERF: CUDA math libs path not found at ${CUDA_MATH_PATH}") - message(WARNING " You may need to 'module load cuda' or set CMAKE_PREFIX_PATH manually") - message(WARNING " Expected libraries: cuBLAS, cuRAND, cuSPARSE, etc.") - erf_cray_verbose("Path exists: NO") - erf_cray_verbose("This may cause link errors for CUDA math libraries") + message(WARNING "CUDA math libs not found at ${CUDA_MATH_PATH}") + message(STATUS " Fix: module load cuda") endif() else() - if(ERF_ENABLE_CUDA AND NOT DEFINED ENV{CUDA_HOME}) - message(WARNING "ERF: CUDA enabled but CUDA_HOME not set") - message(WARNING " Math libraries may not be found") - message(WARNING " Solution: Load CUDA module or set CUDA_HOME") - erf_cray_verbose("CUDA_HOME not set in environment") - else() - erf_cray_verbose("Fix 3 not needed (ERF_ENABLE_CUDA=OFF)") - endif() + message(DEBUG "Fix 3 not needed (CUDA disabled or CUDA_HOME not set)") endif() # ============================================================================== -# Fix 4: GPU-aware MPI with Cray GTL (Checklist Item 4) +# Fix 4: GPU-aware MPI with Cray GTL # ============================================================================== -# PROBLEM: GPU-aware MPI on Cray requires linking against mpi_gtl_cuda library -# which enables GPU Transfer Library for direct GPU-GPU communication -# SOLUTION: Detect GPU-aware MPI and add GTL libraries to link flags if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") set(APPLY_FIX4 FALSE) set(GPU_TYPE "") set(GTL_LIB "") + set(MPI_BASE_LIB "") - # Detect which MPI library variant to use - set(MPI_BASE_LIB "") # Will be determined - - # Try 1: Use pkg-config with Cray compiler wrapper path (for Cray systems) + message(VERBOSE "Detecting MPI library for GPU-aware support") + + # Try pkg-config first find_package(PkgConfig QUIET) if(PkgConfig_FOUND) - # On Cray systems, get pkg-config path from compiler wrapper execute_process( COMMAND CC --cray-print-opts=pkg_config_path OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH @@ -767,36 +540,26 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") ) if(CC_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) - erf_cray_verbose("Found PKG_CONFIG_PATH from CC wrapper: ${CRAY_PKG_CONFIG_PATH}") - # Temporarily prepend to PKG_CONFIG_PATH for pkg-config search set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") - else() - erf_cray_verbose("CC wrapper not available or doesn't support --cray-print-opts") + message(TRACE "PKG_CONFIG_PATH: ${CRAY_PKG_CONFIG_PATH}") endif() pkg_check_modules(CRAY_MPI QUIET mpich) if(CRAY_MPI_FOUND) - erf_cray_verbose("pkg-config found mpich") - erf_cray_verbose(" CRAY_MPI_LIBRARIES: ${CRAY_MPI_LIBRARIES}") - erf_cray_verbose(" CRAY_MPI_LINK_LIBRARIES: ${CRAY_MPI_LINK_LIBRARIES}") - - # Extract the base MPI library name from link flags - # Try both LIBRARIES and LINK_LIBRARIES + message(DEBUG "Found mpich via pkg-config") foreach(lib IN LISTS CRAY_MPI_LIBRARIES CRAY_MPI_LINK_LIBRARIES) if(lib MATCHES "^mpi_" AND NOT lib MATCHES "mpi_gtl") set(MPI_BASE_LIB "${lib}") - erf_cray_verbose("Detected MPI base lib from pkg-config: ${MPI_BASE_LIB}") + message(DEBUG "Detected MPI base: ${MPI_BASE_LIB}") break() endif() endforeach() - else() - erf_cray_verbose("pkg-config did not find mpich") endif() endif() - # Try 2: Search for library files (fallback) + # Fallback: Search filesystem if(NOT MPI_BASE_LIB) - erf_cray_verbose("Falling back to filesystem search for MPI library") + message(DEBUG "Falling back to filesystem search") set(MPI_LIB_SEARCH_PATHS "") if(DEFINED ENV{MPICH_DIR}) list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") @@ -805,18 +568,14 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") endif() - erf_cray_verbose("Searching for MPI libraries in: ${MPI_LIB_SEARCH_PATHS}") - - # Look for versioned libraries first (more specific) foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) file(GLOB mpi_libs "${path}/libmpi_*.so" "${path}/libmpi_*.a") foreach(lib IN LISTS mpi_libs) get_filename_component(libname "${lib}" NAME_WE) string(REGEX REPLACE "^lib" "" libname "${libname}") - # Prefer mpi_gnu_*, mpi_cray over mpi_gtl_* if(libname MATCHES "^mpi_(gnu|cray|intel)" AND NOT MPI_BASE_LIB) set(MPI_BASE_LIB "${libname}") - erf_cray_verbose("Detected MPI base lib from filesystem: ${MPI_BASE_LIB} at ${lib}") + message(DEBUG "Found MPI lib: ${MPI_BASE_LIB}") break() endif() endforeach() @@ -826,29 +585,19 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") endforeach() endif() - # Try 3: Compiler-based heuristic (last resort) + # Last resort: Heuristic if(NOT MPI_BASE_LIB) - erf_cray_verbose("Falling back to compiler-based heuristic for MPI library") - if(DEFINED ENV{CRAY_MPICH_DIR}) - if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - set(MPI_BASE_LIB "mpi_gnu_123") - message(WARNING "ERF: Could not auto-detect MPI library, using heuristic: ${MPI_BASE_LIB}") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") - set(MPI_BASE_LIB "mpi_cray") - message(WARNING "ERF: Could not auto-detect MPI library, using heuristic: ${MPI_BASE_LIB}") - endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(MPI_BASE_LIB "mpi_gnu_123") + message(WARNING "Using heuristic MPI library: ${MPI_BASE_LIB}") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + set(MPI_BASE_LIB "mpi_cray") + message(WARNING "Using heuristic MPI library: ${MPI_BASE_LIB}") else() set(MPI_BASE_LIB "mpi") - erf_cray_verbose("Non-Cray system, using default: ${MPI_BASE_LIB}") endif() endif() - if(MPI_BASE_LIB) - message(STATUS " Using MPI base library: ${MPI_BASE_LIB}") - else() - message(WARNING "ERF: Could not determine MPI base library name!") - endif() - # Determine GPU type and GTL library if(ERF_ENABLE_CUDA) set(APPLY_FIX4 TRUE) @@ -856,164 +605,42 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") set(GTL_LIB "mpi_gtl_cuda") elseif(AMReX_GPU_BACKEND MATCHES "HIP") set(APPLY_FIX4 TRUE) - set(GPU_TYPE "HIP/ROCm") + set(GPU_TYPE "HIP") set(GTL_LIB "mpi_gtl_hsa") endif() if(APPLY_FIX4) - message(STATUS "ERF: [Fix 4] Applying GPU-aware MPI fix (Cray GTL for ${GPU_TYPE})") + message(STATUS "Applying Fix 4: GPU-aware MPI (${GPU_TYPE})") + message(VERBOSE "MPI base library: ${MPI_BASE_LIB}") + message(VERBOSE "GTL library: ${GTL_LIB}") - erf_cray_verbose("Problem: GPU-aware MPI needs Cray GTL libraries") - erf_cray_verbose("Condition: ${GPU_TYPE} + MPI + MPICH_GPU_SUPPORT_ENABLED=1") - erf_cray_verbose("Solution: Add -l${MPI_BASE_LIB} -l${GTL_LIB} to link flags") - erf_cray_verbose("MPICH_GPU_SUPPORT_ENABLED = $ENV{MPICH_GPU_SUPPORT_ENABLED}") - - # Set the MPI+GTL libraries set(CRAY_MPI_LIBS "-l${MPI_BASE_LIB} -l${GTL_LIB}") - - # Try to verify the library exists (for diagnostics) - set(MPI_LIB_SEARCH_PATHS "") - if(DEFINED ENV{MPICH_DIR}) - list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{MPICH_DIR}/lib") - endif() - if(DEFINED ENV{CRAY_MPICH_DIR}) - list(APPEND MPI_LIB_SEARCH_PATHS "$ENV{CRAY_MPICH_DIR}/lib") - endif() - - erf_cray_verbose("Searching for ${GTL_LIB} library in:") - foreach(path IN LISTS MPI_LIB_SEARCH_PATHS) - erf_cray_verbose(" ${path}") - endforeach() - - find_library(CRAY_MPI_GTL_LIB - NAMES ${GTL_LIB} - HINTS ${MPI_LIB_SEARCH_PATHS} - NO_DEFAULT_PATH - ) - - if(CRAY_MPI_GTL_LIB) - message(STATUS " Found GTL library: ${CRAY_MPI_GTL_LIB}") - erf_cray_verbose("Library verification successful") - else() - message(STATUS " GTL library not found via find_library (will rely on linker search)") - erf_cray_verbose("Library not found in search paths, but linker may still find it") - erf_cray_verbose("This is normal if libraries are in non-standard Cray locations") - endif() - - # Apply the fix regardless of whether find_library succeeded - # The Cray linker knows where to find these libraries - message(STATUS " Adding MPI+GTL libraries: ${CRAY_MPI_LIBS}") - erf_cray_verbose("Adding to CMAKE_*_STANDARD_LIBRARIES") + message(DEBUG "Adding: ${CRAY_MPI_LIBS}") if(ERF_ENABLE_CUDA) set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" CACHE STRING "" FORCE) - erf_cray_verbose("CMAKE_CUDA_STANDARD_LIBRARIES: ${CMAKE_CUDA_STANDARD_LIBRARIES}") else() set(CMAKE_HIP_STANDARD_LIBRARIES "${CMAKE_HIP_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" CACHE STRING "" FORCE) - erf_cray_verbose("CMAKE_HIP_STANDARD_LIBRARIES: ${CMAKE_HIP_STANDARD_LIBRARIES}") endif() set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" CACHE STRING "" FORCE) - erf_cray_verbose("CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") endif() - else() - if(ERF_ENABLE_MPI AND (ERF_ENABLE_CUDA OR AMReX_GPU_BACKEND MATCHES "HIP")) - if(NOT DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) - message(STATUS "") - message(STATUS " Note: MPICH_GPU_SUPPORT_ENABLED not set") - message(STATUS " For GPU-aware MPI, add to your script:") - message(STATUS " export MPICH_GPU_SUPPORT_ENABLED=1") - message(STATUS "") - erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED not set") - elseif(NOT "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") - erf_cray_verbose("Fix 4 not applied: MPICH_GPU_SUPPORT_ENABLED=$ENV{MPICH_GPU_SUPPORT_ENABLED} (not '1')") - endif() - else() - erf_cray_verbose("Fix 4 not needed (GPU+MPI not both enabled)") - endif() + message(DEBUG "Fix 4 not needed (GPU+MPI not enabled or GPU support not enabled)") endif() # ============================================================================== -# Fix 7: HDF5 parallel detection for HIP builds (AMD GPUs) +# Fix 5-6: NetCDF with cray-netcdf-hdf5parallel # ============================================================================== -# PROBLEM: When building with HIP, FindHDF5 may find non-parallel HDF5 or -# detect different HDF5 versions for different languages (C vs HIP) -# SOLUTION: Use pkg-config to get HDF5 info and pre-configure HDF5 hints - -if(AMReX_GPU_BACKEND MATCHES "HIP" AND AMReX_HDF5) - message(STATUS "ERF: [Fix 7] Configuring HDF5 for HIP build") - - erf_cray_verbose("Problem: HIP compiler may find different HDF5 than C compiler") - erf_cray_verbose("Condition: AMReX_GPU_BACKEND=HIP and AMReX_HDF5=ON") - erf_cray_verbose("Solution: Use pkg-config to set HDF5 hints before AMReX configures") - - find_package(PkgConfig QUIET) - if(PkgConfig_FOUND) - # Get pkg-config path from Cray compiler wrapper - execute_process( - COMMAND CC --cray-print-opts=pkg_config_path - OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE CC_RESULT - ) - - if(CC_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) - set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") - erf_cray_verbose("Added Cray pkg-config path for HDF5 detection") - erf_cray_verbose(" PKG_CONFIG_PATH: ${CRAY_PKG_CONFIG_PATH}") - endif() - - # Query pkg-config for HDF5 - pkg_check_modules(PC_HDF5 QUIET hdf5) - if(PC_HDF5_FOUND) - message(STATUS " Found HDF5 via pkg-config") - erf_cray_verbose(" HDF5 prefix: ${PC_HDF5_PREFIX}") - erf_cray_verbose(" HDF5 include dirs: ${PC_HDF5_INCLUDE_DIRS}") - erf_cray_verbose(" HDF5 library dirs: ${PC_HDF5_LIBRARY_DIRS}") - - # Set hints for CMake's FindHDF5 (used by AMReX) - set(HDF5_ROOT "${PC_HDF5_PREFIX}" CACHE PATH "HDF5 root from pkg-config") - set(HDF5_PREFER_PARALLEL ON CACHE BOOL "Prefer parallel HDF5") - set(HDF5_IS_PARALLEL TRUE CACHE BOOL "HDF5 is parallel") - - # Help FindHDF5 find the right paths - list(APPEND CMAKE_PREFIX_PATH "${PC_HDF5_PREFIX}") - - message(STATUS " Set HDF5_ROOT = ${PC_HDF5_PREFIX}") - message(STATUS " Set HDF5_PREFER_PARALLEL = ON") - message(STATUS " Set HDF5_IS_PARALLEL = TRUE") - else() - message(WARNING "ERF: pkg-config could not find HDF5") - erf_cray_verbose("pkg-config search for hdf5 failed") - endif() - else() - message(WARNING "ERF: PkgConfig not found, cannot auto-configure HDF5") - endif() - -else() - if(AMReX_HDF5 AND NOT AMReX_GPU_BACKEND MATCHES "HIP") - erf_cray_verbose("Fix 7 not needed (HDF5 enabled but not using HIP backend)") - else() - erf_cray_verbose("Fix 7 not needed (AMReX_HDF5 not enabled)") - endif() -endif() - -# ============================================================================== -# Fix 5-6: NetCDF with cray-netcdf-hdf5parallel (Checklist Items 5-6) -# ============================================================================== -# PROBLEM 5: Cray NetCDF may use different C++ library names or structures -# PROBLEM 6: pkg-config may not find MPI/NetCDF without correct PKG_CONFIG_PATH -# SOLUTION: Set up pkg-config path and add NetCDF/HDF5 directories to search if(ERF_ENABLE_NETCDF) - message(STATUS "ERF: [Fix 5-6] Configuring NetCDF with Cray paths") + message(STATUS "Applying Fix 5-6: NetCDF configuration") + + message(DEBUG "Setting up pkg-config path for NetCDF/MPI") - # Get PKG_CONFIG_PATH directly from Cray wrapper execute_process( COMMAND ${CMAKE_CXX_COMPILER} --cray-print-opts=PKG_CONFIG_PATH OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH @@ -1023,177 +650,227 @@ if(ERF_ENABLE_NETCDF) ) if(PKG_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) - message(STATUS " Setting PKG_CONFIG_PATH from Cray wrapper") + message(VERBOSE "PKG_CONFIG_PATH from Cray wrapper") + message(DEBUG " ${CRAY_PKG_CONFIG_PATH}") - # Append to existing PKG_CONFIG_PATH if(DEFINED ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") else() set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}") endif() - - message(STATUS " PKG_CONFIG_PATH = $ENV{PKG_CONFIG_PATH}") endif() - message(STATUS " PKG_CONFIG_PATH = ${PKG_CONFIG_PATH}") - erf_cray_verbose("This allows cmake/gnumake to find MPI and NetCDF via pkg-config") - - # Help find NetCDF (may be named differently on Cray) + # Add NetCDF/HDF5 to search paths if(DEFINED ENV{NETCDF_DIR}) list(APPEND CMAKE_PREFIX_PATH $ENV{NETCDF_DIR}) - message(STATUS " Added NETCDF_DIR to search path: $ENV{NETCDF_DIR}") - erf_cray_verbose("NetCDF headers/libs will be searched in NETCDF_DIR") - else() - erf_cray_verbose("NETCDF_DIR not set (may still work via module)") + message(VERBOSE "Added NETCDF_DIR to search: $ENV{NETCDF_DIR}") endif() if(DEFINED ENV{HDF5_DIR}) list(APPEND CMAKE_PREFIX_PATH $ENV{HDF5_DIR}) - message(STATUS " Added HDF5_DIR to search path: $ENV{HDF5_DIR}") - erf_cray_verbose("HDF5 headers/libs will be searched in HDF5_DIR") - else() - erf_cray_verbose("HDF5_DIR not set (may still work via module)") + message(VERBOSE "Added HDF5_DIR to search: $ENV{HDF5_DIR}") endif() - - erf_cray_verbose("CMAKE_PREFIX_PATH now includes: ${CMAKE_PREFIX_PATH}") else() - if(ERF_ENABLE_NETCDF) - erf_cray_verbose("Fix 5-6 not fully applied: MPICH_DIR not set") - message(WARNING "ERF: NetCDF enabled but MPICH_DIR not set") - message(WARNING " pkg-config may not find MPI libraries") - message(WARNING " Load MPI module or set MPICH_DIR") + message(DEBUG "Fix 5-6 not needed (NetCDF disabled)") +endif() + +# ============================================================================== +# Fix 7: HDF5 parallel detection for HIP builds +# ============================================================================== + +if(AMReX_GPU_BACKEND MATCHES "HIP" AND AMReX_HDF5) + message(STATUS "Applying Fix 7: HDF5 for HIP") + + message(DEBUG "Configuring HDF5 hints for HIP build") + + find_package(PkgConfig QUIET) + if(PkgConfig_FOUND) + execute_process( + COMMAND CC --cray-print-opts=pkg_config_path + OUTPUT_VARIABLE CRAY_PKG_CONFIG_PATH + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE CC_RESULT + ) + + if(CC_RESULT EQUAL 0 AND CRAY_PKG_CONFIG_PATH) + set(ENV{PKG_CONFIG_PATH} "${CRAY_PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") + endif() + + pkg_check_modules(PC_HDF5 QUIET hdf5) + if(PC_HDF5_FOUND) + message(VERBOSE "Found HDF5 via pkg-config: ${PC_HDF5_PREFIX}") + + set(HDF5_ROOT "${PC_HDF5_PREFIX}" CACHE PATH "HDF5 root from pkg-config") + set(HDF5_PREFER_PARALLEL ON CACHE BOOL "Prefer parallel HDF5") + set(HDF5_IS_PARALLEL TRUE CACHE BOOL "HDF5 is parallel") + + list(APPEND CMAKE_PREFIX_PATH "${PC_HDF5_PREFIX}") + + message(DEBUG "Set HDF5_ROOT: ${PC_HDF5_PREFIX}") + message(DEBUG "Set HDF5_PREFER_PARALLEL: ON") + else() + message(WARNING "pkg-config could not find HDF5") + endif() else() - erf_cray_verbose("Fix 5-6 not needed (ERF_ENABLE_NETCDF=OFF)") + message(WARNING "PkgConfig not found, cannot auto-configure HDF5") endif() +else() + message(DEBUG "Fix 7 not needed (not HIP+HDF5)") endif() # ============================================================================== # Summary # ============================================================================== -message(STATUS "") -message(STATUS "ERF: Cray system fixes summary") -message(STATUS "══════════════════════════════════════════════════════════════") +message(STATUS "Cray configuration complete") -# Fix 1: CUDA + EKAT +# Track which fixes were applied set(FIX1_ACTIVE OFF) -if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3)) +set(FIX2_ACTIVE OFF) +set(FIX3_ACTIVE OFF) +set(FIX4_ACTIVE OFF) +set(FIX56_ACTIVE OFF) +set(FIX7_ACTIVE OFF) + +# Fix 1: CUDA + EKAT +if(ERF_ENABLE_CUDA AND (ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) AND CRAY_CUDA_FLAGS) set(FIX1_ACTIVE ON) endif() -message(STATUS " Fix 1 (CUDA+EKAT): ${FIX1_ACTIVE}") -if(FIX1_ACTIVE AND CRAY_CUDA_FLAGS) - message(STATUS " Applied Cray CUDA flags:") - message(STATUS " ${CRAY_CUDA_FLAGS}") - message(STATUS "") - message(STATUS " Command line equivalent:") - message(STATUS " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") -endif() # Fix 2: fcompare -message(STATUS "") -message(STATUS " Fix 2 (fcompare): ${ERF_ENABLE_FCOMPARE}") if(ERF_ENABLE_FCOMPARE AND CRAY_LIBS_CLEAN) - message(STATUS " Applied Cray library cleanup:") - message(STATUS " ${CRAY_LIBS_CLEAN}") - message(STATUS "") - message(STATUS " Command line equivalent:") - message(STATUS " CRAY_LIBS=\"\$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')\"") - message(STATUS " CRAY_LIBS=\"\$CRAY_LIBS \$(cc --cray-print-opts=libs | sed ...)\"") - message(STATUS " CRAY_LIBS=\"\$CRAY_LIBS \$(ftn --cray-print-opts=libs | sed ...)\"") - message(STATUS " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed \$CRAY_LIBS\"") - message(STATUS "") - message(STATUS " What was actually set:") - message(STATUS " CMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed ${CRAY_LIBS_CLEAN}\"") + set(FIX2_ACTIVE ON) endif() # Fix 3: CUDA math libs -set(FIX3_ACTIVE OFF) if(ERF_ENABLE_CUDA AND DEFINED ENV{CUDA_HOME}) - set(CUDA_MATH_PATH_CHECK "$ENV{CUDA_HOME}/../../math_libs/lib64") - if(EXISTS ${CUDA_MATH_PATH_CHECK}) + set(CUDA_MATH_CHECK "$ENV{CUDA_HOME}/../../math_libs/lib64") + if(EXISTS ${CUDA_MATH_CHECK}) set(FIX3_ACTIVE ON) endif() endif() -message(STATUS "") -message(STATUS " Fix 3 (CUDA math): ${FIX3_ACTIVE}") -if(FIX3_ACTIVE) - message(STATUS " Command line equivalent:") - message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\"") -endif() # Fix 4: GPU-aware MPI -set(FIX4_ACTIVE OFF) -if(ERF_ENABLE_CUDA AND ERF_ENABLE_MPI AND DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED}) - if(CRAY_MPI_GTL_CUDA) - set(FIX4_ACTIVE ON) - endif() -endif() -message(STATUS "") -message(STATUS " Fix 4 (GPU-aware MPI): ${FIX4_ACTIVE}") -if(FIX4_ACTIVE) - message(STATUS " Command line equivalent:") - message(STATUS " export MPICH_GPU_SUPPORT_ENABLED=1") - message(STATUS " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\"") - message(STATUS " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\"") +if(APPLY_FIX4) + set(FIX4_ACTIVE ON) endif() # Fix 5-6: NetCDF -set(FIX56_ACTIVE OFF) -if(ERF_ENABLE_NETCDF AND DEFINED ENV{MPICH_DIR}) +if(ERF_ENABLE_NETCDF) set(FIX56_ACTIVE ON) endif() -message(STATUS "") -message(STATUS " Fix 5-6 (NetCDF): ${FIX56_ACTIVE}") -if(FIX56_ACTIVE) - message(STATUS " Command line equivalent:") - message(STATUS " export PKG_CONFIG_PATH=\"\$MPICH_DIR/lib/pkgconfig:\$PKG_CONFIG_PATH\"") - if(DEFINED ENV{NETCDF_DIR}) - message(STATUS " -DCMAKE_PREFIX_PATH=\"\$NETCDF_DIR\"") - endif() - if(DEFINED ENV{HDF5_DIR}) - message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CMAKE_PREFIX_PATH:\$HDF5_DIR\"") - endif() -endif() # Fix 7: HDF5 for HIP -set(FIX7_ACTIVE OFF) if(AMReX_GPU_BACKEND MATCHES "HIP" AND AMReX_HDF5) set(FIX7_ACTIVE ON) endif() -message(STATUS "") -message(STATUS " Fix 7 (HDF5+HIP): ${FIX7_ACTIVE}") + +# Show summary at VERBOSE level +message(VERBOSE "Applied fixes:") +if(FIX1_ACTIVE) + message(VERBOSE " Fix 1 (CUDA+EKAT): ACTIVE") +endif() +if(FIX2_ACTIVE) + message(VERBOSE " Fix 2 (fcompare): ACTIVE") +endif() +if(FIX3_ACTIVE) + message(VERBOSE " Fix 3 (CUDA math): ACTIVE") +endif() +if(FIX4_ACTIVE) + message(VERBOSE " Fix 4 (GPU-aware MPI): ACTIVE") +endif() +if(FIX56_ACTIVE) + message(VERBOSE " Fix 5-6 (NetCDF): ACTIVE") +endif() if(FIX7_ACTIVE) - message(STATUS " Command line equivalent:") - message(STATUS " -DHDF5_ROOT=\$(pkg-config --variable=prefix hdf5)") - message(STATUS " -DHDF5_PREFER_PARALLEL=ON") - message(STATUS " -DHDF5_IS_PARALLEL=TRUE") + message(VERBOSE " Fix 7 (HDF5+HIP): ACTIVE") endif() -message(STATUS "") -message(STATUS "══════════════════════════════════════════════════════════════") -message(STATUS " To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") -message(STATUS " To see verbose output: -DERF_VERBOSE_CRAY_FIXES=ON") -message(STATUS " To override any fix: Set the corresponding CMAKE_* variable explicitly") -message(STATUS "") -message(STATUS " Complete manual equivalent (all active fixes):") -message(STATUS " ------------------------------------------------") +# Command-line equivalents (DEBUG level) +message(DEBUG "Command-line equivalents for active fixes:") +message(DEBUG "=====================================================================") + if(FIX1_ACTIVE) -message(STATUS " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\" \\") + message(DEBUG "Fix 1 (CUDA+EKAT):") + message(DEBUG " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\"") + message(DEBUG "") endif() -if(ERF_ENABLE_FCOMPARE AND CRAY_LIBS_CLEAN) -message(STATUS " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed ${CRAY_LIBS_CLEAN}\" \\") + +if(FIX2_ACTIVE) + message(DEBUG "Fix 2 (fcompare):") + message(DEBUG " CRAY_LIBS=\"\$(CC --cray-print-opts=libs | sed 's/-Wl,--as-needed,//g; s/,--no-as-needed//g; s/,-l/ -l/g')\"") + message(DEBUG " CRAY_LIBS=\"\$CRAY_LIBS \$(cc --cray-print-opts=libs | sed ...)\"") + message(DEBUG " CRAY_LIBS=\"\$CRAY_LIBS \$(ftn --cray-print-opts=libs | sed ...)\"") + message(DEBUG " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed \$CRAY_LIBS\"") + message(DEBUG "") endif() + if(FIX3_ACTIVE) -message(STATUS " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\" \\") + message(DEBUG "Fix 3 (CUDA math):") + message(DEBUG " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\"") + message(DEBUG "") endif() + if(FIX4_ACTIVE) -message(STATUS " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\" \\") -message(STATUS " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-lmpi_gnu_123 -lmpi_gtl_cuda\" \\") + message(DEBUG "Fix 4 (GPU-aware MPI):") + message(DEBUG " export MPICH_GPU_SUPPORT_ENABLED=1") + if(ERF_ENABLE_CUDA) + message(DEBUG " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\"") + else() + message(DEBUG " -DCMAKE_HIP_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\"") + endif() + message(DEBUG " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\"") + message(DEBUG "") endif() -message(STATUS "") -if(ERF_VERBOSE_CRAY_FIXES) - message(STATUS "[VERBOSE] All Cray fixes processing complete") - message(STATUS "[VERBOSE] Review messages above for detailed information") - message(STATUS "[VERBOSE] The command-line equivalents above show what this module does automatically") -endif() \ No newline at end of file +if(FIX56_ACTIVE) + message(DEBUG "Fix 5-6 (NetCDF):") + message(DEBUG " export PKG_CONFIG_PATH=\"\$(CC --cray-print-opts=PKG_CONFIG_PATH):\$PKG_CONFIG_PATH\"") + if(DEFINED ENV{NETCDF_DIR}) + message(DEBUG " -DCMAKE_PREFIX_PATH=\"\$NETCDF_DIR\"") + endif() + if(DEFINED ENV{HDF5_DIR}) + message(DEBUG " -DCMAKE_PREFIX_PATH=\"\$CMAKE_PREFIX_PATH:\$HDF5_DIR\"") + endif() + message(DEBUG "") +endif() + +if(FIX7_ACTIVE) + message(DEBUG "Fix 7 (HDF5+HIP):") + message(DEBUG " -DHDF5_ROOT=\$(pkg-config --variable=prefix hdf5)") + message(DEBUG " -DHDF5_PREFER_PARALLEL=ON") + message(DEBUG " -DHDF5_IS_PARALLEL=TRUE") + message(DEBUG "") +endif() + +if(FIX1_ACTIVE OR FIX2_ACTIVE OR FIX3_ACTIVE OR FIX4_ACTIVE OR FIX56_ACTIVE OR FIX7_ACTIVE) + message(DEBUG "Complete manual equivalent (all active fixes):") + message(DEBUG "=====================================================================") + if(FIX1_ACTIVE) + message(DEBUG " -DCMAKE_CUDA_FLAGS=\"\$(CC --cray-print-opts=cflags)\" \\") + endif() + if(FIX2_ACTIVE) + message(DEBUG " -DCMAKE_EXE_LINKER_FLAGS=\"-Wl,--no-as-needed ${CRAY_LIBS_CLEAN}\" \\") + endif() + if(FIX3_ACTIVE) + message(DEBUG " -DCMAKE_PREFIX_PATH=\"\$CUDA_HOME/../../math_libs/lib64\" \\") + endif() + if(FIX4_ACTIVE) + if(ERF_ENABLE_CUDA) + message(DEBUG " -DCMAKE_CUDA_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\" \\") + else() + message(DEBUG " -DCMAKE_HIP_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\" \\") + endif() + message(DEBUG " -DCMAKE_CXX_STANDARD_LIBRARIES=\"-l${MPI_BASE_LIB} -l${GTL_LIB}\" \\") + endif() + message(DEBUG "") +endif() + +message(DEBUG "=====================================================================") +message(DEBUG "To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") +message(DEBUG "For verbose output: cmake --log-level=VERBOSE ..") +message(DEBUG "For debug output: cmake --log-level=DEBUG ..") + +# Pop Cray context +list(POP_BACK CMAKE_MESSAGE_CONTEXT) From dd626043c71221393783db568bb076ff1eaa3655 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 14:15:01 -0800 Subject: [PATCH 35/44] Add message context to main cmake --- CMakeLists.txt | 166 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 124 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb3cffa882..17729866d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required (VERSION 3.14 FATAL_ERROR) +# Set main ERF context +list(APPEND CMAKE_MESSAGE_CONTEXT "ERF") + # Include Cray compiler detection BEFORE project() to set compilers list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake") include(CrayCompilerDetection) @@ -12,16 +15,22 @@ else() project(ERF CXX C) endif() +message(STATUS "Configuring ERF") +message(VERBOSE "Source directory: ${CMAKE_SOURCE_DIR}") +message(VERBOSE "Build directory: ${CMAKE_BINARY_DIR}") + # Find NVHPC package and create aliases if needed if(ERF_ENABLE_CUDA AND ERF_ENABLE_NVHPC) + message(VERBOSE "Configuring NVHPC CUDA support") + find_package(NVHPC REQUIRED COMPONENTS MATH CUDA) function(create_cuda_alias nvhpc_target cuda_name) if(TARGET NVHPC::${nvhpc_target} AND NOT TARGET CUDA::${cuda_name}) add_library(CUDA::${cuda_name} ALIAS NVHPC::${nvhpc_target}) - message(STATUS " Created alias: CUDA::${cuda_name} -> NVHPC::${nvhpc_target}") + message(STATUS "Created alias: CUDA::${cuda_name} -> NVHPC::${nvhpc_target}") elseif(NOT TARGET NVHPC::${nvhpc_target}) - message(WARNING "X Cannot create alias CUDA::${cuda_name}: NVHPC::${nvhpc_target} not found") - endif() + message(WARNING "Cannot create alias CUDA::${cuda_name}: NVHPC::${nvhpc_target} not found") + endif() endfunction() create_cuda_alias(CUBLAS cublas) create_cuda_alias(CUBLAS_STATIC cublas_static) @@ -31,7 +40,6 @@ if(ERF_ENABLE_CUDA AND ERF_ENABLE_NVHPC) create_cuda_alias(CUSPARSE_STATIC cusparse_static) endif() - list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake") include(CMakePackageConfigHelpers) @@ -41,6 +49,8 @@ include(UtilityTargets) ########################## OPTIONS ##################################### +message(VERBOSE "Configuring build options") + #General options for all executables in the project set(ERF_DIM "3" CACHE STRING "Number of physical dimensions") option(ERF_ENABLE_DOCUMENTATION "Build documentation" OFF) @@ -91,6 +101,7 @@ endif() # Configure measuring code coverage in tests option(CODECOVERAGE "Enable code coverage profiling" OFF) if(CODECOVERAGE) + message(VERBOSE "Enabling code coverage profiling") # Only supports GNU if(NOT CMAKE_CXX_COMPILER_ID MATCHES GNU) message(WARNING "CODECOVERAGE is only support with GNU Compilers. The current C++ compiler is ${CMAKE_CXX_COMPILER_ID}") @@ -105,7 +116,10 @@ endif() ########################### AMReX ##################################### +list(APPEND CMAKE_MESSAGE_CONTEXT "AMReX") + if (${ERF_USE_INTERNAL_AMREX}) + message(STATUS "Using internal AMReX submodule") set(AMREX_SUBMOD_LOCATION "${CMAKE_SOURCE_DIR}/Submodules/AMReX") include(${CMAKE_SOURCE_DIR}/CMake/SetAmrexOptions.cmake) list(APPEND CMAKE_MODULE_PATH "${AMREX_SUBMOD_LOCATION}/Tools/CMake") @@ -123,7 +137,9 @@ if (${ERF_USE_INTERNAL_AMREX}) set(FCOMPARE_EXE ${CMAKE_BINARY_DIR}/Submodules/AMReX/Tools/Plotfile/amrex_fcompare CACHE STRING "Path to fcompare executable for regression tests") endif() + message(VERBOSE "fcompare executable: ${FCOMPARE_EXE}") else() + message(STATUS "Using external AMReX") set(CMAKE_PREFIX_PATH ${AMREX_DIR} ${CMAKE_PREFIX_PATH}) list(APPEND AMREX_COMPONENTS "3D" "PIC" "PARTICLES" "PDOUBLE" "DOUBLE" "LSOLVERS") @@ -149,6 +165,7 @@ else() list(APPEND AMREX_COMPONENTS "TINY_PROFILE") endif() separate_arguments(AMREX_COMPONENTS) + message(VERBOSE "Required AMReX components: ${AMREX_COMPONENTS}") find_package(AMReX CONFIG REQUIRED COMPONENTS ${AMREX_COMPONENTS}) message(STATUS "Found AMReX = ${AMReX_DIR}") @@ -159,8 +176,11 @@ else() set(FCOMPARE_EXE ${AMReX_DIR}/../../../bin/amrex_fcompare CACHE STRING "Path to fcompare executable for regression tests") endif() + message(VERBOSE "fcompare executable: ${FCOMPARE_EXE}") endif() +list(POP_BACK CMAKE_MESSAGE_CONTEXT) + ########################## EKAT ################################## if(ERF_ENABLE_RRTMGP OR ERF_ENABLE_SHOC OR ERF_ENABLE_P3) @@ -170,6 +190,9 @@ else() endif() if(ERF_ENABLE_EKAT) + list(APPEND CMAKE_MESSAGE_CONTEXT "EKAT") + + message(STATUS "Configuring EKAT") if(NOT ERF_ENABLE_MPI) message(FATAL_ERROR "CMake Error: MPI must be enabled if EKAT is enabled.") @@ -177,19 +200,22 @@ if(ERF_ENABLE_EKAT) # NOTE: EKAT provides KOKKOS, so set relevant flags for KOKKOS if(ERF_ENABLE_CUDA) + message(VERBOSE "Enabling Kokkos CUDA support") set(Kokkos_ENABLE_CUDA ON CACHE BOOL "kokkos enable cuda") set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "kokkos enable cuda lambda") set(Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ON CACHE BOOL "kokkos enable cuda RDC") endif() if(ERF_ENABLE_HIP) + message(VERBOSE "Enabling Kokkos HIP support") set(Kokkos_ENABLE_HIP ON CACHE BOOL "kokkos enable hip") set(Kokkos_ENABLE_HIP_LAMBDA ON CACHE BOOL "kokkos enable hip lambda") set(Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE ON CACHE BOOL "kokkos enable hip RDC") endif() if(ERF_ENABLE_SYCL) - set(Kokkos_ENABLE_SYCL ON CACHE BOOL "kokkos enable hip") + message(VERBOSE "Enabling Kokkos SYCL support") + set(Kokkos_ENABLE_SYCL ON CACHE BOOL "kokkos enable sycl") set(Kokkos_ENABLE_SYCL_LAMBDA ON CACHE BOOL "kokkos enable sycl lambda") endif() @@ -199,12 +225,20 @@ if(ERF_ENABLE_EKAT) set(EKAT_ENABLE_KOKKOS ON CACHE BOOL "ekat enable kokkos") set(EKAT_ENABLE_LOGGING ON CACHE BOOL "ekat enable spdlog") set(EKAT_BIN ${CMAKE_BINARY_DIR}/Submodules/ekat) + + message(VERBOSE "EKAT binary directory: ${EKAT_BIN}") add_subdirectory(${CMAKE_SOURCE_DIR}/Submodules/ekat ${EKAT_BIN}) + list(POP_BACK CMAKE_MESSAGE_CONTEXT) endif() + ########################### MPI ##################################### if(ERF_ENABLE_MPI) + list(APPEND CMAKE_MESSAGE_CONTEXT "MPI") + + message(STATUS "Configuring MPI") + # Check if we're on Cray with bare MPI wrappers (which will hang) set(SKIP_MPI_DETECTION FALSE) @@ -213,14 +247,14 @@ if(ERF_ENABLE_MPI) if(CMAKE_CXX_COMPILER MATCHES "mpicxx" OR CMAKE_C_COMPILER MATCHES "mpicc" OR CMAKE_Fortran_COMPILER MATCHES "mpifort") - message(STATUS "Detected bare MPI wrappers on Cray - skipping MPI detection (would hang)") + message(STATUS "Detected bare MPI wrappers on Cray - skipping detection (would hang)") set(SKIP_MPI_DETECTION TRUE) endif() endif() if(SKIP_MPI_DETECTION) # Workaround: Manual MPI setup (avoids hang) - message(STATUS "Manually configuring MPI (bypassing find_package)...") + message(VERBOSE "Manually configuring MPI (bypassing find_package)...") # Get Cray MPICH version for informational purposes set(MPICH_VERSION "UNKNOWN") @@ -236,10 +270,12 @@ if(ERF_ENABLE_MPI) # Create MPI targets if(NOT TARGET MPI::MPI_CXX) add_library(MPI::MPI_CXX INTERFACE IMPORTED) + message(DEBUG "Created MPI::MPI_CXX target") endif() if(NOT TARGET MPI::MPI_C) add_library(MPI::MPI_C INTERFACE IMPORTED) + message(DEBUG "Created MPI::MPI_C target") endif() # Set MPI variables @@ -249,84 +285,114 @@ if(ERF_ENABLE_MPI) set(MPI_C_VERSION "${MPI_VERSION}") set(MPI_CXX_VERSION "${MPI_VERSION}") - message(STATUS " Cray MPICH implementation: ${MPICH_VERSION}") - message(STATUS " MPI API standard: ${MPI_VERSION}") - message(STATUS " Created MPI::MPI_CXX and MPI::MPI_C targets") + message(STATUS "Cray MPICH: ${MPICH_VERSION}") + message(STATUS "MPI standard: ${MPI_VERSION}") + message(VERBOSE "Created MPI::MPI_CXX and MPI::MPI_C targets") else() # Normal path: Use find_package - message(STATUS "Using find_package(MPI) for detection...") + message(VERBOSE "Using find_package(MPI) for detection") set(_mpi_comps C CXX) if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) list(APPEND _mpi_comps Fortran) endif() + message(DEBUG "MPI components: ${_mpi_comps}") find_package(MPI REQUIRED ${_mpi_comps}) endif() -endif() -message(STATUS "MPI section complete") + message(STATUS "MPI configuration complete") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) +endif() ########################## NETCDF ################################## -message(STATUS "DEBUG: ERF_ENABLE_NETCDF = ${ERF_ENABLE_NETCDF}") -message(STATUS "DEBUG: ERF_ENABLE_TOOLS = ${ERF_ENABLE_TOOLS}") -message(STATUS "DEBUG: About to check NetCDF section...") if(ERF_ENABLE_NETCDF OR ERF_ENABLE_TOOLS) + list(APPEND CMAKE_MESSAGE_CONTEXT "NetCDF") + + message(STATUS "Configuring NetCDF") + message(DEBUG "ERF_ENABLE_NETCDF: ${ERF_ENABLE_NETCDF}") + message(DEBUG "ERF_ENABLE_TOOLS: ${ERF_ENABLE_TOOLS}") + set(CMAKE_PREFIX_PATH ${NETCDF_DIR} ${CMAKE_PREFIX_PATH}) -## set(NETCDF_CXX "YES") find_package (NetCDF REQUIRED) if(NETCDF_FOUND) - message(STATUS "Found NetCDF, NETCDF_DIR = ${NETCDF_DIR}") + message(STATUS "Found NetCDF: ${NETCDF_DIR}") + message(VERBOSE "NetCDF includes: ${NETCDF_INCLUDES}") + message(VERBOSE "NetCDF libraries: ${NETCDF_LIBRARIES}") endif() -endif() -message(STATUS "DEBUG: NetCDF section complete") + message(STATUS "NetCDF configuration complete") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) +endif() ########################## NOAH-MP ################################## if(ERF_ENABLE_NOAHMP) + list(APPEND CMAKE_MESSAGE_CONTEXT "Noah-MP") + + message(STATUS "Configuring Noah-MP") + if(ERF_ENABLE_NETCDF) set(NOAHMP_HOME ${CMAKE_SOURCE_DIR}/Submodules/Noah-MP/drivers/erf) set(NOAHMP_BIN ${CMAKE_BINARY_DIR}/Submodules/Noah-MP/drivers/erf) + message(VERBOSE "Noah-MP source: ${NOAHMP_HOME}") + message(VERBOSE "Noah-MP binary: ${NOAHMP_BIN}") add_subdirectory(${NOAHMP_HOME} ${NOAHMP_BIN}) else() message(FATAL_ERROR "Noah-MP requires NetCDF be enabled") endif() -message(STATUS "DEBUG: NOAH-MP section complete") + + message(STATUS "Noah-MP configuration complete") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) endif() ########################### RRTMGP ################################# if(ERF_ENABLE_RRTMGP) - if(NOT ERF_ENABLE_EKAT) - message(FATAL_ERROR "CMake Error: EKAT must be enabled if RRTMGP is enabled.") - endif() + list(APPEND CMAKE_MESSAGE_CONTEXT "RRTMGP") - if(NOT ERF_ENABLE_NETCDF) - message(FATAL_ERROR "CMake Error: NetCDF must be enabled if RRTMGP is enabled.") - endif() + message(STATUS "Configuring RRTMGP") + + if(NOT ERF_ENABLE_EKAT) + message(FATAL_ERROR "EKAT must be enabled if RRTMGP is enabled") + endif() - message(STATUS "Building RRTMGP + KOKKOS...") + if(NOT ERF_ENABLE_NETCDF) + message(FATAL_ERROR "NetCDF must be enabled if RRTMGP is enabled") + endif() + + message(VERBOSE "Building RRTMGP with Kokkos support") + + # Build the static rrtmgp library + set(RRTMGP_BIN ${CMAKE_BINARY_DIR}/Submodules/rrtmgp) + message(VERBOSE "RRTMGP binary directory: ${RRTMGP_BIN}") + add_subdirectory(${CMAKE_SOURCE_DIR}/Submodules/RRTMGP/cpp ${RRTMGP_BIN}) - # Build the static rrtmgp library - set(RRTMGP_BIN ${CMAKE_BINARY_DIR}/Submodules/rrtmgp) - add_subdirectory(${CMAKE_SOURCE_DIR}/Submodules/RRTMGP/cpp ${RRTMGP_BIN}) + # Set up kokkos library and definitions + set(RRTMGP_ENABLE_KOKKOS TRUE) + target_compile_definitions(rrtmgp PUBLIC RRTMGP_ENABLE_KOKKOS) + target_link_libraries(rrtmgp kokkos) + message(DEBUG "Added Kokkos to RRTMGP target") - # Set up kokkos library and definitions - set(RRTMGP_ENABLE_KOKKOS TRUE) - target_compile_definitions(rrtmgp PUBLIC RRTMGP_ENABLE_KOKKOS) - target_link_libraries(rrtmgp kokkos) -message(STATUS "DEBUG: RRTMGP section complete") + message(STATUS "RRTMGP configuration complete") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) endif() ########################### SHOC ################################# if(ERF_ENABLE_SHOC) - if(NOT ERF_ENABLE_EKAT) - message(FATAL_ERROR "CMake Error: EKAT must be enabled if SHOC is enabled.") - endif() + list(APPEND CMAKE_MESSAGE_CONTEXT "SHOC") + + message(STATUS "Configuring SHOC") - # NOTE: We compile shoc src files directly -message(STATUS "DEBUG: SHOC section complete") + if(NOT ERF_ENABLE_EKAT) + message(FATAL_ERROR "EKAT must be enabled if SHOC is enabled") + endif() + + # NOTE: We compile shoc src files directly + message(VERBOSE "SHOC source files will be compiled directly") + + message(STATUS "SHOC configuration complete") + list(POP_BACK CMAKE_MESSAGE_CONTEXT) endif() ########################### ERF ##################################### @@ -345,15 +411,21 @@ include(${CMAKE_SOURCE_DIR}/CMake/SetRpath.cmake) add_subdirectory(Exec) if(ERF_ENABLE_TESTS) + message(STATUS "Configuring tests") include(CTest) add_subdirectory(Tests) endif() if(ERF_ENABLE_DOCUMENTATION) - add_subdirectory(Docs) + message(STATUS "Configuring documentation") + add_subdirectory(Docs) endif() # Installation rules +list(APPEND CMAKE_MESSAGE_CONTEXT "Install") + +message(STATUS "Configuring installation") + include(CMakePackageConfigHelpers) include(GNUInstallDirs) @@ -406,3 +478,13 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} ) + +message(VERBOSE "Install prefix: ${CMAKE_INSTALL_PREFIX}") +message(VERBOSE "Install libdir: ${CMAKE_INSTALL_LIBDIR}") + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) + +# Pop main ERF context +list(POP_BACK CMAKE_MESSAGE_CONTEXT) + +message(STATUS "ERF configuration complete") From 4229a494ee498cb57a7c5f19ffe93e889f918de0 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 17:24:37 -0500 Subject: [PATCH 36/44] Test log-context and fix nesting --- Build/cmake_with_kokkos_many_noradiation_hip.sh | 1 + CMake/FindNetCDF.cmake | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Build/cmake_with_kokkos_many_noradiation_hip.sh b/Build/cmake_with_kokkos_many_noradiation_hip.sh index a2fd160a93..839713de50 100755 --- a/Build/cmake_with_kokkos_many_noradiation_hip.sh +++ b/Build/cmake_with_kokkos_many_noradiation_hip.sh @@ -19,6 +19,7 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ -DERF_ENABLE_FCOMPARE:BOOL=ON \ -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + --log-context \ -B build_erf .. cmake --build build_erf -j10 -v diff --git a/CMake/FindNetCDF.cmake b/CMake/FindNetCDF.cmake index 4168a5a956..cd5430e6cb 100644 --- a/CMake/FindNetCDF.cmake +++ b/CMake/FindNetCDF.cmake @@ -119,6 +119,9 @@ find_library(NETCDF_LIBRARIES_C NAMES netcdf mark_as_advanced(NETCDF_LIBRARIES_C) if(NETCDF_LIBRARIES_C) + message(VERBOSE "Found libnetcdf: ${NETCDF_LIBRARIES_C}") + list(APPEND NETCDF_DETECTION_LOG "find_library: ${NETCDF_LIBRARIES_C}") + # Only add HDF5 if pkg-config told us NetCDF needs it if(NETCDF_LINK_LIBRARIES) # Check if pkg-config's library list includes hdf5 @@ -136,14 +139,15 @@ if(NETCDF_LIBRARIES_C) message(STATUS " NETCDF_LIBRARIES_C = ${NETCDF_LINK_LIBRARIES}") endif() else() - message(STATUS "NetCDF has no link libraries (potentially was built without HDF5 support)") + message(STATUS "NetCDF has no HDF5 dependency in pkg-config") endif() - message(VERBOSE "Found libnetcdf: ${NETCDF_LIBRARIES_C}") - list(APPEND NETCDF_DETECTION_LOG "find_library: ${NETCDF_LIBRARIES_C}") + endif() # <-- THIS WAS MISSING! + # FALLBACK: If find_library failed but pkg-config succeeded, use pkg-config's library list elseif(NETCDF_FOUND AND NETCDF_LINK_LIBRARIES) set(NETCDF_LIBRARIES_C ${NETCDF_LINK_LIBRARIES}) message(STATUS "Using NetCDF libraries from pkg-config: ${NETCDF_LINK_LIBRARIES}") + list(APPEND NETCDF_DETECTION_LOG "pkg-config fallback: ${NETCDF_LINK_LIBRARIES}") else() message(DEBUG "libnetcdf not found") list(APPEND NETCDF_DETECTION_LOG "find_library: failed") From 2f6f0763fbd048438665c381c08e4f562ca1b080 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 14:43:16 -0800 Subject: [PATCH 37/44] Move new warning --- CMake/FindNetCDF.cmake | 51 +++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/CMake/FindNetCDF.cmake b/CMake/FindNetCDF.cmake index cd5430e6cb..be76f7ef5d 100644 --- a/CMake/FindNetCDF.cmake +++ b/CMake/FindNetCDF.cmake @@ -47,14 +47,14 @@ else() list(APPEND NETCDF_DETECTION_LOG "NETCDF_DIR not set") endif() -#if(DEFINED ENV{NETCDF_DIR}) -# list(APPEND NETCDF_INCLUDE_HINTS $ENV{NETCDF_DIR}/include) -# list(APPEND NETCDF_LIBRARY_HINTS $ENV{NETCDF_DIR}/lib) -# message(VERBOSE "Using ENV NETCDF_DIR: $ENV{NETCDF_DIR}") -# list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR=$ENV{NETCDF_DIR}") -#else() -# list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR not set") -#endif() +if(DEFINED ENV{NETCDF_DIR}) + list(APPEND NETCDF_INCLUDE_HINTS $ENV{NETCDF_DIR}/include) + list(APPEND NETCDF_LIBRARY_HINTS $ENV{NETCDF_DIR}/lib) + message(VERBOSE "Using ENV NETCDF_DIR: $ENV{NETCDF_DIR}") + list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR=$ENV{NETCDF_DIR}") +else() + list(APPEND NETCDF_DETECTION_LOG "ENV NETCDF_DIR not set") +endif() if(NETCDF_INCLUDE_DIR) list(APPEND NETCDF_INCLUDE_HINTS ${NETCDF_INCLUDE_DIR}) @@ -201,6 +201,41 @@ set(NETCDF_LIBRARIES "${NetCDF_libs}" CACHE STRING "All NetCDF libraries require set(NETCDF_LINK_LIBRARIES ${NetCDF_libs}) set(NETCDF_INCLUDE_DIRS ${NETCDF_INCLUDES}) +# Check if detection failed - show helpful error BEFORE standard handler +if(NOT NETCDF_LIBRARIES_C OR NOT NETCDF_INCLUDES) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "NetCDF Detection Failed") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "Detection attempts:") + foreach(attempt ${NETCDF_DETECTION_LOG}) + message(STATUS " ${attempt}") + endforeach() + message(STATUS "") + message(STATUS "Missing components:") + message(STATUS " netcdf.h: ${NETCDF_INCLUDES}") + message(STATUS " libnetcdf: ${NETCDF_LIBRARIES_C}") + message(STATUS "") + message(STATUS "To resolve:") + message(STATUS "") + message(STATUS " On Perlmutter/NERSC:") + message(STATUS " module load cray-netcdf-hdf5parallel") + message(STATUS "") + message(STATUS " On other Cray systems:") + message(STATUS " module load cray-netcdf") + message(STATUS "") + message(STATUS " Or specify manually:") + message(STATUS " cmake -DNETCDF_DIR=/path/to/netcdf ..") + message(STATUS "") + message(STATUS " Or via environment:") + message(STATUS " export NETCDF_DIR=/path/to/netcdf") + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "") + message(FATAL_ERROR "NetCDF not found") +endif() + # Standard find package handling include(FindPackageHandleStandardArgs) find_package_handle_standard_args (NetCDF DEFAULT_MSG NETCDF_LIBRARIES NETCDF_LINK_LIBRARIES NETCDF_INCLUDE_DIRS NETCDF_INCLUDES NetCDF_has_interfaces) From 2c4712d00da5fe0f7fc91f06687926b098f3a545 Mon Sep 17 00:00:00 2001 From: Mahesh Natarajan Date: Tue, 11 Nov 2025 15:10:40 -0800 Subject: [PATCH 38/44] Correcting hurricane intensification output (#2707) Co-authored-by: Mahesh Natarajan --- Source/Utils/ERF_HurricaneDiagnostics.H | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Utils/ERF_HurricaneDiagnostics.H b/Source/Utils/ERF_HurricaneDiagnostics.H index 61ed21b47a..4337cdd750 100644 --- a/Source/Utils/ERF_HurricaneDiagnostics.H +++ b/Source/Utils/ERF_HurricaneDiagnostics.H @@ -326,7 +326,6 @@ HurricaneMaxVelTracker(const amrex::Geometry& geom, amrex::Real h_val_max_global = -1.0e30; #ifdef AMREX_USE_MPI MPI_Allreduce(&h_val_max_local, &h_val_max_global, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - h_val_max_global = h_val_max_local; #else h_val_max_global = h_val_max_local; #endif From 1186823c3e53b741d802651df2d5c100428fdb87 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Tue, 11 Nov 2025 15:45:07 -0800 Subject: [PATCH 39/44] Add more verbose errors and warnings --- Build/cmake_with_kokkos_many_cuda.sh | 1 + CMake/CrayCompilerDetection.cmake | 151 ++++++++++++++++++++++----- CMakeLists.txt | 108 ++++++++++++++++--- 3 files changed, 223 insertions(+), 37 deletions(-) diff --git a/Build/cmake_with_kokkos_many_cuda.sh b/Build/cmake_with_kokkos_many_cuda.sh index bc7cf23345..f13cf79ee9 100755 --- a/Build/cmake_with_kokkos_many_cuda.sh +++ b/Build/cmake_with_kokkos_many_cuda.sh @@ -19,6 +19,7 @@ cmake -DCMAKE_INSTALL_PREFIX:PATH=./install_erf \ -DERF_ENABLE_FCOMPARE:BOOL=ON \ -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \ -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + --log-context \ -B build_erf .. cmake --build build_erf -j10 -v diff --git a/CMake/CrayCompilerDetection.cmake b/CMake/CrayCompilerDetection.cmake index 28b7096cd2..61da7cfd0d 100644 --- a/CMake/CrayCompilerDetection.cmake +++ b/CMake/CrayCompilerDetection.cmake @@ -5,6 +5,37 @@ # The main CrayDetection.cmake runs AFTER project() to apply build fixes # ============================================================================== +# ----------------------------------------------------------------------------- +# Helper function: Suggest machine profile +# ----------------------------------------------------------------------------- +function(erf_suggest_machine_profile) + execute_process( + COMMAND hostname -s + OUTPUT_VARIABLE hostname + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + + file(GLOB profiles "${CMAKE_SOURCE_DIR}/Build/machines/*_erf.profile") + + message(STATUS " Load modules from your machine profile:") + message(STATUS "") + + if(profiles) + foreach(p ${profiles}) + get_filename_component(name ${p} NAME_WE) + # Check if hostname contains profile name + if(hostname MATCHES "${name}") + message(STATUS " source ${p} <-- matches hostname '${hostname}'") + else() + message(STATUS " source ${p}") + endif() + endforeach() + else() + message(STATUS " No profiles found in ${CMAKE_SOURCE_DIR}/Build/machines/") + endif() +endfunction() + # Skip if user already set compilers explicitly if(CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER OR CMAKE_Fortran_COMPILER) message(STATUS "ERF: Compilers already specified by user, skipping Cray auto-detection") @@ -81,33 +112,70 @@ message(STATUS "") # GPU Host Compilers (for CUDA, HIP, SYCL) # ----------------------------------------------------------------------------- -# CUDA Host Compiler - detect via environment +# ----------------------------------------------------------------------------- +# GPU Host Compilers (for CUDA, HIP, SYCL) +# ----------------------------------------------------------------------------- + +# CUDA - Check if craype-accel module is loaded on Cray systems if(DEFINED ENV{CUDA_HOME} OR DEFINED ENV{CUDATOOLKIT_HOME} OR DEFINED ENV{CRAY_ACCEL_TARGET}) - message(STATUS " Detected CUDA environment, configuring CUDA host compiler...") + message(STATUS " Detected CUDA environment") - # Only set if not already specified by user - if(NOT CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}) - if(ERF_CRAY_CXX) - set(CMAKE_CUDA_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "CUDA host compiler" FORCE) - message(STATUS " Set CMAKE_CUDA_HOST_COMPILER = ${ERF_CRAY_CXX}") + # On Cray systems, need craype-accel-* module loaded + if(DEFINED ENV{CRAYPE_VERSION}) + if(NOT DEFINED ENV{CRAY_ACCEL_TARGET}) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "CUDA on Cray: Missing craype-accel Module") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "The Cray compiler wrappers need a craype-accel-* module loaded") + message(STATUS "to configure GPU support (sets CRAY_ACCEL_TARGET).") + message(STATUS "") + message(STATUS "To fix, load the appropriate module from your machine profile:") + message(STATUS "") + erf_suggest_machine_profile() + message(STATUS "") + message(STATUS " Examples of craype-accel modules:") + message(STATUS " craype-accel-nvidia80 (A100)") + message(STATUS " craype-accel-nvidia90 (H100)") + message(STATUS " craype-accel-amd-gfx90a (MI250X)") + message(STATUS "") + message(STATUS "====================================================================") + message(FATAL_ERROR "CUDA requires craype-accel module on Cray systems") + else() + message(STATUS " craype-accel module loaded: CRAY_ACCEL_TARGET=$ENV{CRAY_ACCEL_TARGET}") + message(STATUS " Cray wrappers will handle CUDA compilation") endif() - else() - message(STATUS " CUDA host compiler already set by user") endif() endif() -# HIP Host Compiler - detect via ROCM environment +# HIP - Check if craype-accel module is loaded on Cray systems if(DEFINED ENV{ROCM_PATH} OR DEFINED ENV{HIP_PATH}) - message(STATUS " Detected ROCm/HIP environment, configuring HIP host compiler...") + message(STATUS " Detected ROCm/HIP environment") - # Only set if not already specified by user - if(NOT CMAKE_HIP_HOST_COMPILER AND NOT DEFINED ENV{HIPHOSTCXX}) - if(ERF_CRAY_CXX) - set(CMAKE_HIP_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "HIP host compiler" FORCE) - message(STATUS " Set CMAKE_HIP_HOST_COMPILER = ${ERF_CRAY_CXX}") + if(DEFINED ENV{CRAYPE_VERSION}) + if(NOT DEFINED ENV{CRAY_ACCEL_TARGET}) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "HIP on Cray: Missing craype-accel Module") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "The Cray compiler wrappers need a craype-accel-* module loaded") + message(STATUS "to configure GPU support (sets CRAY_ACCEL_TARGET).") + message(STATUS "") + message(STATUS "To fix, load the appropriate module from your machine profile:") + message(STATUS "") + erf_suggest_machine_profile() + message(STATUS "") + message(STATUS " Examples of craype-accel modules:") + message(STATUS " craype-accel-amd-gfx90a (MI250X)") + message(STATUS " craype-accel-amd-gfx942 (MI300)") + message(STATUS "") + message(STATUS "====================================================================") + message(FATAL_ERROR "HIP requires craype-accel module on Cray systems") + else() + message(STATUS " craype-accel module loaded: CRAY_ACCEL_TARGET=$ENV{CRAY_ACCEL_TARGET}") endif() - else() - message(STATUS " HIP host compiler already set by user") endif() endif() @@ -177,14 +245,44 @@ endif() message(STATUS "ERF: Setting minimal flags for compiler tests...") if(DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED} AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") - message(STATUS " GPU-aware MPI detected, adding CUDA runtime for tests") + message(STATUS " GPU-aware MPI detected") message(STATUS " Detected libraries: ${GTL_LIBS}") - # APPEND to linker flags - if(CMAKE_EXE_LINKER_FLAGS) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lcudart -lcuda" CACHE STRING "" FORCE) - else() - set(CMAKE_EXE_LINKER_FLAGS "-lcudart -lcuda" CACHE STRING "" FORCE) + # Only add CUDA runtime if CUDA is actually available + set(NEED_CUDA_RUNTIME FALSE) + if(DEFINED ENV{CRAY_ACCEL_TARGET}) + if("$ENV{CRAY_ACCEL_TARGET}" MATCHES "nvidia") + set(NEED_CUDA_RUNTIME TRUE) + endif() + endif() + + if(NEED_CUDA_RUNTIME) + # Check if CUDA toolkit is available + if(DEFINED ENV{CUDA_HOME} OR DEFINED ENV{CUDATOOLKIT_HOME}) + message(STATUS " Adding CUDA runtime libraries for GPU-aware MPI tests") + + # APPEND to linker flags + if(CMAKE_EXE_LINKER_FLAGS) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lcudart -lcuda" CACHE STRING "" FORCE) + else() + set(CMAKE_EXE_LINKER_FLAGS "-lcudart -lcuda" CACHE STRING "" FORCE) + endif() + else() + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "GPU-Aware MPI: CUDA Runtime Not Found") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "GPU-aware MPI is enabled (MPICH_GPU_SUPPORT_ENABLED=1) but") + message(STATUS "CUDA toolkit is not loaded.") + message(STATUS "") + message(STATUS "To fix, load the appropriate modules from your machine profile:") + message(STATUS "") + erf_suggest_machine_profile() + message(STATUS "") + message(STATUS "====================================================================") + message(FATAL_ERROR "GPU-aware MPI requires CUDA toolkit") + endif() endif() # APPEND to standard libraries (use DETECTED GTL_LIBS, not hardcoded!) @@ -199,4 +297,9 @@ if(DEFINED ENV{MPICH_GPU_SUPPORT_ENABLED} AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" else() set(CMAKE_CUDA_STANDARD_LIBRARIES "${GTL_LIBS}" CACHE STRING "" FORCE) endif() + + message(STATUS " CMAKE_EXE_LINKER_FLAGS: ${CMAKE_EXE_LINKER_FLAGS}") + message(STATUS " CMAKE_CXX_STANDARD_LIBRARIES: ${CMAKE_CXX_STANDARD_LIBRARIES}") +else() + message(STATUS " GPU-aware MPI not enabled") endif() \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 17729866d3..a95120bceb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,25 +19,51 @@ message(STATUS "Configuring ERF") message(VERBOSE "Source directory: ${CMAKE_SOURCE_DIR}") message(VERBOSE "Build directory: ${CMAKE_BINARY_DIR}") +# Find NVHPC package and create aliases if needed # Find NVHPC package and create aliases if needed if(ERF_ENABLE_CUDA AND ERF_ENABLE_NVHPC) - message(VERBOSE "Configuring NVHPC CUDA support") - - find_package(NVHPC REQUIRED COMPONENTS MATH CUDA) + list(APPEND CMAKE_MESSAGE_CONTEXT "NVHPC") + message(STATUS "Configuring NVHPC CUDA support") + + find_package(NVHPC QUIET COMPONENTS MATH CUDA) + + if(NOT NVHPC_FOUND) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "NVHPC Detection Failed") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "To resolve:") + message(STATUS " Load NVHPC module or set NVHPC_ROOT") + message(STATUS "") + message(STATUS " Example:") + message(STATUS " module load nvhpc") + message(STATUS " cmake -DNVHPC_ROOT=/path/to/nvhpc ..") + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "") + message(FATAL_ERROR "NVHPC required but not found") + endif() + + message(STATUS "Found NVHPC") + function(create_cuda_alias nvhpc_target cuda_name) if(TARGET NVHPC::${nvhpc_target} AND NOT TARGET CUDA::${cuda_name}) add_library(CUDA::${cuda_name} ALIAS NVHPC::${nvhpc_target}) - message(STATUS "Created alias: CUDA::${cuda_name} -> NVHPC::${nvhpc_target}") + message(STATUS " Created alias: CUDA::${cuda_name} -> NVHPC::${nvhpc_target}") elseif(NOT TARGET NVHPC::${nvhpc_target}) - message(WARNING "Cannot create alias CUDA::${cuda_name}: NVHPC::${nvhpc_target} not found") + message(WARNING " Cannot create alias CUDA::${cuda_name}: NVHPC::${nvhpc_target} not found") endif() endfunction() + create_cuda_alias(CUBLAS cublas) create_cuda_alias(CUBLAS_STATIC cublas_static) create_cuda_alias(CURAND curand) create_cuda_alias(CURAND_STATIC curand_static) create_cuda_alias(CUSPARSE cusparse) create_cuda_alias(CUSPARSE_STATIC cusparse_static) + + list(POP_BACK CMAKE_MESSAGE_CONTEXT) endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake") @@ -140,6 +166,7 @@ if (${ERF_USE_INTERNAL_AMREX}) message(VERBOSE "fcompare executable: ${FCOMPARE_EXE}") else() message(STATUS "Using external AMReX") + set(CMAKE_PREFIX_PATH ${AMREX_DIR} ${CMAKE_PREFIX_PATH}) list(APPEND AMREX_COMPONENTS "3D" "PIC" "PARTICLES" "PDOUBLE" "DOUBLE" "LSOLVERS") @@ -164,11 +191,36 @@ else() if (ERF_ENABLE_TINY_PROFILE) list(APPEND AMREX_COMPONENTS "TINY_PROFILE") endif() + separate_arguments(AMREX_COMPONENTS) message(VERBOSE "Required AMReX components: ${AMREX_COMPONENTS}") - find_package(AMReX CONFIG REQUIRED - COMPONENTS ${AMREX_COMPONENTS}) - message(STATUS "Found AMReX = ${AMReX_DIR}") + + find_package(AMReX CONFIG QUIET COMPONENTS ${AMREX_COMPONENTS}) + + if(NOT AMReX_FOUND) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "AMReX Detection Failed") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "Required components: ${AMREX_COMPONENTS}") + message(STATUS "") + message(STATUS "To resolve:") + message(STATUS "") + message(STATUS " Option 1: Use internal AMReX (recommended):") + message(STATUS " cmake -DERF_USE_INTERNAL_AMREX=ON ..") + message(STATUS "") + message(STATUS " Option 2: Build and install AMReX separately, then:") + message(STATUS " cmake -DAMReX_DIR=/path/to/amrex/lib/cmake/AMReX ..") + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "") + message(FATAL_ERROR "AMReX required but not found") + endif() + + message(STATUS "Found AMReX: ${AMReX_DIR}") + message(VERBOSE " AMReX_VERSION: ${AMReX_VERSION}") + if(WIN32) set(FCOMPARE_EXE ${AMReX_DIR}/../../../*/amrex_fcompare.exe CACHE STRING "Path to fcompare executable for regression tests") @@ -177,6 +229,7 @@ else() CACHE STRING "Path to fcompare executable for regression tests") endif() message(VERBOSE "fcompare executable: ${FCOMPARE_EXE}") + endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) @@ -225,7 +278,7 @@ if(ERF_ENABLE_EKAT) set(EKAT_ENABLE_KOKKOS ON CACHE BOOL "ekat enable kokkos") set(EKAT_ENABLE_LOGGING ON CACHE BOOL "ekat enable spdlog") set(EKAT_BIN ${CMAKE_BINARY_DIR}/Submodules/ekat) - + message(VERBOSE "EKAT binary directory: ${EKAT_BIN}") add_subdirectory(${CMAKE_SOURCE_DIR}/Submodules/ekat ${EKAT_BIN}) @@ -234,9 +287,10 @@ endif() ########################### MPI ##################################### +########################### MPI ##################################### + if(ERF_ENABLE_MPI) list(APPEND CMAKE_MESSAGE_CONTEXT "MPI") - message(STATUS "Configuring MPI") # Check if we're on Cray with bare MPI wrappers (which will hang) @@ -254,7 +308,7 @@ if(ERF_ENABLE_MPI) if(SKIP_MPI_DETECTION) # Workaround: Manual MPI setup (avoids hang) - message(VERBOSE "Manually configuring MPI (bypassing find_package)...") + message(VERBOSE "Manually configuring MPI (bypassing find_package)") # Get Cray MPICH version for informational purposes set(MPICH_VERSION "UNKNOWN") @@ -289,18 +343,46 @@ if(ERF_ENABLE_MPI) message(STATUS "MPI standard: ${MPI_VERSION}") message(VERBOSE "Created MPI::MPI_CXX and MPI::MPI_C targets") else() - # Normal path: Use find_package + # Normal path: Use find_package with QUIET message(VERBOSE "Using find_package(MPI) for detection") set(_mpi_comps C CXX) if(ERF_ENABLE_MORR_FORT OR ERF_ENABLE_NOAHMP) list(APPEND _mpi_comps Fortran) endif() message(DEBUG "MPI components: ${_mpi_comps}") - find_package(MPI REQUIRED ${_mpi_comps}) + + find_package(MPI QUIET COMPONENTS ${_mpi_comps}) + + if(NOT MPI_FOUND) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "MPI Detection Failed") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "To resolve, load modules from your machine profile:") + message(STATUS "") + erf_suggest_machine_profile() + message(STATUS "") + message(STATUS " Or on non-Cray systems, install MPI:") + message(STATUS " OpenMPI, MPICH, Intel MPI, etc.") + message(STATUS "") + message(STATUS " Then configure with:") + message(STATUS " cmake -DMPI_C_COMPILER=mpicc -DMPI_CXX_COMPILER=mpicxx ..") + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "") + message(FATAL_ERROR "MPI required but not found") + endif() + + message(STATUS "Found MPI") + message(VERBOSE " MPI_C_VERSION: ${MPI_C_VERSION}") + message(VERBOSE " MPI_CXX_VERSION: ${MPI_CXX_VERSION}") endif() message(STATUS "MPI configuration complete") list(POP_BACK CMAKE_MESSAGE_CONTEXT) +else() + message(DEBUG "MPI not enabled") endif() ########################## NETCDF ################################## From a6da818af23f55bf1556391fa2b8a66d53665139 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Wed, 12 Nov 2025 05:45:25 -0800 Subject: [PATCH 40/44] Make a config file, replace accidently removed host --- CMake/CrayCompilerDetection.cmake | 61 ++++++++++++-- CMake/CrayDetection.cmake | 136 ++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 6 deletions(-) diff --git a/CMake/CrayCompilerDetection.cmake b/CMake/CrayCompilerDetection.cmake index 61da7cfd0d..39b3a9a382 100644 --- a/CMake/CrayCompilerDetection.cmake +++ b/CMake/CrayCompilerDetection.cmake @@ -112,14 +112,10 @@ message(STATUS "") # GPU Host Compilers (for CUDA, HIP, SYCL) # ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# GPU Host Compilers (for CUDA, HIP, SYCL) -# ----------------------------------------------------------------------------- - # CUDA - Check if craype-accel module is loaded on Cray systems if(DEFINED ENV{CUDA_HOME} OR DEFINED ENV{CUDATOOLKIT_HOME} OR DEFINED ENV{CRAY_ACCEL_TARGET}) message(STATUS " Detected CUDA environment") - + # On Cray systems, need craype-accel-* module loaded if(DEFINED ENV{CRAYPE_VERSION}) if(NOT DEFINED ENV{CRAY_ACCEL_TARGET}) @@ -147,12 +143,39 @@ if(DEFINED ENV{CUDA_HOME} OR DEFINED ENV{CUDATOOLKIT_HOME} OR DEFINED ENV{CRAY_A message(STATUS " Cray wrappers will handle CUDA compilation") endif() endif() + + # Set CUDA compiler (default to Cray wrapper, can be overridden) + # Respect: CMAKE_CUDA_COMPILER (cache), CUDACXX (env) +# if(NOT CMAKE_CUDA_COMPILER AND NOT DEFINED ENV{CUDACXX}) +# if(ERF_CRAY_CXX) +# set(CMAKE_CUDA_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "CUDA compiler (Cray wrapper)") +# message(STATUS " Set CMAKE_CUDA_COMPILER = ${ERF_CRAY_CXX}") +# message(STATUS " -> Inherits MPI paths automatically (no Fix 1 needed)") +# endif() +# elseif(CMAKE_CUDA_COMPILER) +# message(STATUS " CMAKE_CUDA_COMPILER already set: ${CMAKE_CUDA_COMPILER}") +# elseif(DEFINED ENV{CUDACXX}) +# message(STATUS " CUDACXX environment variable set: $ENV{CUDACXX}") +# endif() + + # Set CUDA host compiler (used when nvcc or nvcc_wrapper is the CUDA compiler) + # Respect: CMAKE_CUDA_HOST_COMPILER (cache), CUDAHOSTCXX (env) + if(NOT CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}) + if(ERF_CRAY_CXX) + set(CMAKE_CUDA_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "CUDA host compiler") + message(STATUS " Set CMAKE_CUDA_HOST_COMPILER = ${ERF_CRAY_CXX}") + endif() + elseif(CMAKE_CUDA_HOST_COMPILER) + message(STATUS " CMAKE_CUDA_HOST_COMPILER already set: ${CMAKE_CUDA_HOST_COMPILER}") + elseif(DEFINED ENV{CUDAHOSTCXX}) + message(STATUS " CUDAHOSTCXX environment variable set: $ENV{CUDAHOSTCXX}") + endif() endif() # HIP - Check if craype-accel module is loaded on Cray systems if(DEFINED ENV{ROCM_PATH} OR DEFINED ENV{HIP_PATH}) message(STATUS " Detected ROCm/HIP environment") - + if(DEFINED ENV{CRAYPE_VERSION}) if(NOT DEFINED ENV{CRAY_ACCEL_TARGET}) message(STATUS "") @@ -177,6 +200,32 @@ if(DEFINED ENV{ROCM_PATH} OR DEFINED ENV{HIP_PATH}) message(STATUS " craype-accel module loaded: CRAY_ACCEL_TARGET=$ENV{CRAY_ACCEL_TARGET}") endif() endif() + + # Set HIP compiler (Cray wrapper handles HIP via hipcc) + # Respect: CMAKE_HIP_COMPILER (cache), HIPCXX (env) +# if(NOT CMAKE_HIP_COMPILER AND NOT DEFINED ENV{HIPCXX}) +# if(ERF_CRAY_CXX) +# set(CMAKE_HIP_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "HIP compiler (Cray wrapper)") +# message(STATUS " Set CMAKE_HIP_COMPILER = ${ERF_CRAY_CXX}") +# endif() +# elseif(CMAKE_HIP_COMPILER) +# message(STATUS " CMAKE_HIP_COMPILER already set: ${CMAKE_HIP_COMPILER}") +# elseif(DEFINED ENV{HIPCXX}) +# message(STATUS " HIPCXX environment variable set: $ENV{HIPCXX}") +# endif() + + # Set HIP host compiler + # Respect: CMAKE_HIP_HOST_COMPILER (cache), HIPHOSTCXX (env) + if(NOT CMAKE_HIP_HOST_COMPILER AND NOT DEFINED ENV{HIPHOSTCXX}) + if(ERF_CRAY_CXX) + set(CMAKE_HIP_HOST_COMPILER "${ERF_CRAY_CXX}" CACHE FILEPATH "HIP host compiler") + message(STATUS " Set CMAKE_HIP_HOST_COMPILER = ${ERF_CRAY_CXX}") + endif() + elseif(CMAKE_HIP_HOST_COMPILER) + message(STATUS " CMAKE_HIP_HOST_COMPILER already set: ${CMAKE_HIP_HOST_COMPILER}") + elseif(DEFINED ENV{HIPHOSTCXX}) + message(STATUS " HIPHOSTCXX environment variable set: $ENV{HIPHOSTCXX}") + endif() endif() # SYCL - detect via Intel oneAPI diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 8299749e8f..561b120c6e 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -867,6 +867,142 @@ if(FIX1_ACTIVE OR FIX2_ACTIVE OR FIX3_ACTIVE OR FIX4_ACTIVE OR FIX56_ACTIVE OR F message(DEBUG "") endif() +# At the end of CrayDetection.cmake, after all fixes: + +# ============================================================================== +# Generate Concise Config File +# ============================================================================== + +set(CRAY_CONFIG_FILE "${CMAKE_BINARY_DIR}/cray_detected_config.cmake") + +file(WRITE ${CRAY_CONFIG_FILE} +"# ============================================================================== +# Auto-detected Cray Configuration +# Generated: ${CMAKE_CURRENT_LIST_FILE} +# Date: 2025-11-12 +# ============================================================================== +# This file shows the settings auto-detected by CrayDetection.cmake +# You can use this as a starting point for a manual config file. +# +# To use manually: +# cmake -C cray_detected_config.cmake .. +# ============================================================================== + +") + +# System info +file(APPEND ${CRAY_CONFIG_FILE} " +# System Detection +set(ERF_ON_CRAY TRUE CACHE BOOL \"Detected Cray system\") +set(CRAYPE_VERSION \"$ENV{CRAYPE_VERSION}\" CACHE STRING \"Cray PE version\") +") + +# Compiler info +file(APPEND ${CRAY_CONFIG_FILE} " +# Compiler Configuration +set(CMAKE_C_COMPILER \"${CMAKE_C_COMPILER}\" CACHE FILEPATH \"\") +set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"\") +set(CMAKE_CXX_COMPILER_ID \"${CMAKE_CXX_COMPILER_ID}\" CACHE STRING \"\") +set(CMAKE_CXX_COMPILER_VERSION \"${CMAKE_CXX_COMPILER_VERSION}\" CACHE STRING \"\") +") + +# GPU architectures +if(ERF_ENABLE_CUDA AND AMReX_CUDA_ARCH) + file(APPEND ${CRAY_CONFIG_FILE} " +# CUDA Configuration +set(AMReX_CUDA_ARCH \"${AMReX_CUDA_ARCH}\" CACHE STRING \"Auto-detected\") +") +endif() + +if(AMReX_AMD_ARCH) + file(APPEND ${CRAY_CONFIG_FILE} " +# HIP Configuration +set(AMReX_AMD_ARCH \"${AMReX_AMD_ARCH}\" CACHE STRING \"Auto-detected\") +") +endif() + +if(KOKKOS_ARCH_SET) + file(APPEND ${CRAY_CONFIG_FILE} " +# Kokkos Architecture +") + foreach(arch IN ITEMS VOLTA70 AMPERE80 HOPPER90 VEGA90A VEGA908 MI300A) + if(Kokkos_ARCH_${arch}) + file(APPEND ${CRAY_CONFIG_FILE} "set(Kokkos_ARCH_${arch} ON CACHE BOOL \"Auto-detected\")\n") + endif() + endforeach() +endif() + +# Applied fixes +file(APPEND ${CRAY_CONFIG_FILE} " +# Applied Fixes +") + +if(FIX1_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 1: CUDA+EKAT nvcc_wrapper flags +set(CMAKE_CUDA_FLAGS \"${CMAKE_CUDA_FLAGS}\" CACHE STRING \"\") +") +endif() + +if(FIX2_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 2: fcompare linker flags +set(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\" CACHE STRING \"\") +") +endif() + +if(FIX3_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 3: CUDA math libraries path +list(APPEND CMAKE_PREFIX_PATH \"${CUDA_MATH_PATH}\") +") +endif() + +if(FIX4_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 4: GPU-aware MPI (${GPU_TYPE}) +set(CMAKE_CXX_STANDARD_LIBRARIES \"${CMAKE_CXX_STANDARD_LIBRARIES}\" CACHE STRING \"\") +") + if(ERF_ENABLE_CUDA) + file(APPEND ${CRAY_CONFIG_FILE} "set(CMAKE_CUDA_STANDARD_LIBRARIES \"${CMAKE_CUDA_STANDARD_LIBRARIES}\" CACHE STRING \"\")\n") + endif() +endif() + +if(FIX56_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 5-6: NetCDF/HDF5 paths +set(ENV{PKG_CONFIG_PATH} \"$ENV{PKG_CONFIG_PATH}\") +") + if(DEFINED ENV{NETCDF_DIR}) + file(APPEND ${CRAY_CONFIG_FILE} "list(APPEND CMAKE_PREFIX_PATH \"$ENV{NETCDF_DIR}\")\n") + endif() +endif() + +if(FIX7_ACTIVE) + file(APPEND ${CRAY_CONFIG_FILE} " +# Fix 7: HDF5 parallel for HIP +set(HDF5_ROOT \"${HDF5_ROOT}\" CACHE PATH \"\") +set(HDF5_PREFER_PARALLEL ON CACHE BOOL \"\") +set(HDF5_IS_PARALLEL TRUE CACHE BOOL \"\") +") +endif() + +message(STATUS "Generated config: ${CRAY_CONFIG_FILE}") + +# Add a target to display it +add_custom_target(show-cray-config + COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" + COMMAND ${CMAKE_COMMAND} -E echo "Auto-detected Cray Configuration:" + COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" + COMMAND ${CMAKE_COMMAND} -E cat ${CRAY_CONFIG_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "To use this config manually:" + COMMAND ${CMAKE_COMMAND} -E echo " cmake -C ${CRAY_CONFIG_FILE} .." + COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" + COMMENT "Displaying auto-detected Cray configuration" + VERBATIM +) + message(DEBUG "=====================================================================") message(DEBUG "To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") message(DEBUG "For verbose output: cmake --log-level=VERBOSE ..") From 048daf32f9c83bde8e25eeea615b719c790794a1 Mon Sep 17 00:00:00 2001 From: Laren Spear Date: Wed, 12 Nov 2025 16:42:05 -0600 Subject: [PATCH 41/44] MSVC with MS-MPI + Downloadable Binary (#2709) * NetCDF/RRTGMP/Particles CI * Remove commented out Spack commands Removed commented-out lines for spack view commands. * windows with ms-mpi attempt 1 * windows with ms-mpi attempt 1 * windows with ms-mpi attempt 2 * add option for MPI in cmake * fix test path * fix indentation * fix paths * Ctest with powershell? * Ctest with powershell 2 * Simplifying installing MS-MPI + more * Change shell and MPI executable path * MPI wrapper script * More MPI test changes (hopeful) * MS-MPI test * MS-MPI from correct source * MS-MPI from correct source 2 * MSI vs EXE * Exe nonewwindow * Remove exe * Remove exe 2 * Compile only, not run * Binary artifact * Binary artifact 2 * Whole build directory Updated artifact upload paths to include the entire build directory. * Modify Windows MPI workflow and enhance README Updated job configuration for Windows MPI workflow to support both OFF and ON variants for MPI. Added detailed installation instructions and troubleshooting steps in the README. * Style fixes * Remove non-MPI builds * Put regular windows.yml back to prior state --------- Co-authored-by: Aaron M. Lattanzi <103702284+AMLattanzi@users.noreply.github.com> --- .github/workflows/windows-mpi.yml | 163 ++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 .github/workflows/windows-mpi.yml diff --git a/.github/workflows/windows-mpi.yml b/.github/workflows/windows-mpi.yml new file mode 100644 index 0000000000..cdb98b42b5 --- /dev/null +++ b/.github/workflows/windows-mpi.yml @@ -0,0 +1,163 @@ +name: Windows + +on: [push, pull_request] + +concurrency: + group: ${{ github.ref }}-${{ github.head_ref }}-windows + cancel-in-progress: true + +jobs: + WIN64-MSVC: + name: WIN64-MSVC - MPI ${{ matrix.mpi }} - Particles ${{ matrix.particles }} + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + particles: [OFF, ON] + mpi: [ON] + + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: true + + - name: Install MS-MPI SDK only + shell: pwsh + run: | + curl -L -o msmpisdk.msi https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi + msiexec /i msmpisdk.msi /qn /norestart + + $msmpiInc = 'C:\Program Files (x86)\Microsoft SDKs\MPI\Include' + $msmpiLib = 'C:\Program Files (x86)\Microsoft SDKs\MPI\Lib\x64' + "MSMPI_INC=$msmpiInc" | Out-File -FilePath $env:GITHUB_ENV -Append + "MSMPI_LIB=$msmpiLib" | Out-File -FilePath $env:GITHUB_ENV -Append + + - name: Configure (CMake) + shell: pwsh + run: | + cmake -S . -B build ` + -DCMAKE_BUILD_TYPE=Release ` + -DERF_ENABLE_MPI=ON ` + -DMPI_CXX_LIB_NAMES=msmpi ` + -DMPI_C_LIB_NAMES=msmpi ` + -DMPI_msmpi_LIBRARY="$env:MSMPI_LIB\msmpi.lib" ` + ${{ github.workspace }} + + - name: Build + shell: pwsh + run: cmake --build build --parallel 2 --verbose + + - name: Create Installation README + shell: pwsh + run: | + @" + # ERF Windows Build - Installation Instructions + + ## Build Information + - Build Type: Release + - MPI Enabled: ${{ matrix.mpi }} + - Particles Enabled: ${{ matrix.particles }} + - MS-MPI Version: 10.1.1 + - Built on: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss UTC") + - Commit: ${{ github.sha }} + + ## Required Runtime Installation + + This executable requires MS-MPI Runtime v10.1.1 to run. + + ### Quick Install (PowerShell): + ``````powershell + # Download and install MS-MPI Runtime v10.1.1 + Invoke-WebRequest -Uri "https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe" -OutFile "msmpisetup.exe" + .\msmpisetup.exe + + # After installation, open a NEW PowerShell window to refresh PATH + `````` + + ### Alternative Install (using vcpkg): + ``````powershell + # vcpkg will download the installer and prompt you to run it + vcpkg install msmpi:x64-windows + # Follow the instructions vcpkg provides to run the downloaded installer + `````` + + ## Running the Executable + + Navigate to the executable directory (e.g., `Exec\ABL\Debug\` or `Exec\ABL\Release\`) and run: + + ``````powershell + # Single process + mpiexec -n 1 .\erf_abl.exe path\to\inputs_file + + # Multiple processes (e.g., 4) + mpiexec -n 4 .\erf_abl.exe path\to\inputs_file + `````` + + **Important:** Always use `mpiexec` or `mpirun` to launch. Running `.\erf_abl.exe` directly will trigger Windows Defender firewall warnings. + + ## Troubleshooting + + ### Executable runs but produces no output: + - MS-MPI runtime is not installed, or the wrong version is installed. + - Install MS-MPI v10.1.1 runtime using the script above + - Open a NEW terminal after installation + + ### "mpiexec not recognized": + - Open a new PowerShell window after installing MS-MPI + - Or manually refresh PATH: `$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine")` + + ## Build Details + + This build was compiled with: + - Compiler: MSVC 19.29+ + - CMake Configuration: + - CMAKE_BUILD_TYPE=Release + - ERF_ENABLE_MPI=ON + - ERF_ENABLE_PARTICLES=${{ matrix.particles }} + "@ | Out-File -FilePath build\INSTALL.txt -Encoding utf8 + + - name: Add Installation Instructions to Job Summary + shell: pwsh + run: | + @" + ## ✅ Build Complete: ERF Windows (MPI: ${{ matrix.mpi }}, Particles: ${{ matrix.particles }}) + + ### 📦 Artifact Information + - **Build Type:** Release + - **MS-MPI Version:** 10.1.1 + - **Commit:** ${{ github.sha }} + + ### ⚠️ Required for Running + + This executable requires **MS-MPI Runtime v10.1.1** + + #### Quick Install: + ``````powershell + Invoke-WebRequest -Uri "https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe" -OutFile "msmpisetup.exe" + .\msmpisetup.exe + `````` + + #### Or use vcpkg: + ``````powershell + vcpkg install msmpi:x64-windows + `````` + + ### 🚀 Running the Executable + + ``````powershell + mpiexec -n 4 .\erf_abl.exe path\to\inputs_file + `````` + + **Note:** Always use `mpiexec` to launch (not `.\erf_abl.exe` directly) to avoid Windows Defender warnings. + + See `INSTALL.txt` in the artifact for complete instructions. + "@ | Out-File -FilePath $env:GITHUB_STEP_SUMMARY -Append -Encoding utf8 + + - name: Upload artifact (from build tree) + uses: actions/upload-artifact@v4 + with: + name: ERF-win64-Release-mpi-${{ matrix.mpi }}-particles-${{ matrix.particles }} + path: | + build + if-no-files-found: warn From 46d94bb618929a1a12d15ff68963e0fd8e36a84f Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 14 Nov 2025 07:23:37 -0800 Subject: [PATCH 42/44] More cmake module checking --- CMake/CrayDetection.cmake | 195 ++++++++++++++++++++++++++++++++++--- CMake/UtilityTargets.cmake | 95 +++++++++--------- 2 files changed, 224 insertions(+), 66 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index 561b120c6e..ae99899ec1 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -66,6 +66,77 @@ if(NOT ERF_ON_CRAY) return() endif() +# ============================================================================== +# Optional: Check for Stale Configuration +# ============================================================================== + +option(ERF_CHECK_MODULES "Check for stale configuration from module changes" ON) + +if(ERF_CHECK_MODULES) + list(APPEND CMAKE_MESSAGE_CONTEXT "CrayConfigCheck") + + message(DEBUG "Starting configuration verification") + + # Detection log for issues + set(STALE_CONFIG_LOG "") + + # Check 1: Module environment changed + if(DEFINED ENV{LOADEDMODULES}) + set(CURRENT_MODULES "$ENV{LOADEDMODULES}") + if(DEFINED CACHED_LOADED_MODULES) + if(NOT "${CURRENT_MODULES}" STREQUAL "${CACHED_LOADED_MODULES}") + message(VERBOSE "Module environment changed since last configure") + list(APPEND STALE_CONFIG_LOG "LOADEDMODULES changed") + list(APPEND STALE_CONFIG_LOG " Previous: ${CACHED_LOADED_MODULES}") + list(APPEND STALE_CONFIG_LOG " Current: ${CURRENT_MODULES}") + else() + message(DEBUG "Module environment unchanged") + endif() + else() + message(DEBUG "First configure - caching module environment") + endif() + set(CACHED_LOADED_MODULES "${CURRENT_MODULES}" CACHE INTERNAL "Modules at configure time") + endif() + + # Check 2: PE_ENV changed + if(DEFINED ENV{PE_ENV}) + set(CURRENT_PE_ENV "$ENV{PE_ENV}") + if(DEFINED CACHED_PE_ENV AND NOT "${CURRENT_PE_ENV}" STREQUAL "${CACHED_PE_ENV}") + message(VERBOSE "PE_ENV changed: ${CACHED_PE_ENV} -> ${CURRENT_PE_ENV}") + list(APPEND STALE_CONFIG_LOG "PE_ENV changed from ${CACHED_PE_ENV} to ${CURRENT_PE_ENV}") + endif() + set(CACHED_PE_ENV "${CURRENT_PE_ENV}" CACHE INTERNAL "") + endif() + + # Check 3: Compiler version changed + if(DEFINED CMAKE_CXX_COMPILER_VERSION) + if(DEFINED CACHED_CXX_COMPILER_VERSION AND NOT "${CMAKE_CXX_COMPILER_VERSION}" STREQUAL "${CACHED_CXX_COMPILER_VERSION}") + message(VERBOSE "Compiler version changed") + list(APPEND STALE_CONFIG_LOG "Compiler version changed from ${CACHED_CXX_COMPILER_VERSION} to ${CMAKE_CXX_COMPILER_VERSION}") + endif() + set(CACHED_CXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}" CACHE INTERNAL "") + endif() + + # Check 4: CMAKE_*_STANDARD_LIBRARIES already contains MPI (from previous run) + if(DEFINED CMAKE_CXX_STANDARD_LIBRARIES AND CMAKE_CXX_STANDARD_LIBRARIES) + if(CMAKE_CXX_STANDARD_LIBRARIES MATCHES "mpi_") + message(VERBOSE "CMAKE_CXX_STANDARD_LIBRARIES already contains MPI libraries") + list(APPEND STALE_CONFIG_LOG "CMAKE_CXX_STANDARD_LIBRARIES pre-populated with MPI libs") + list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CXX_STANDARD_LIBRARIES}") + endif() + endif() + + if(DEFINED CMAKE_CUDA_STANDARD_LIBRARIES AND CMAKE_CUDA_STANDARD_LIBRARIES) + if(CMAKE_CUDA_STANDARD_LIBRARIES MATCHES "mpi_") + message(VERBOSE "CMAKE_CUDA_STANDARD_LIBRARIES already contains MPI libraries") + list(APPEND STALE_CONFIG_LOG "CMAKE_CUDA_STANDARD_LIBRARIES pre-populated with MPI libs") + list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + endif() + endif() + + list(POP_BACK CMAKE_MESSAGE_CONTEXT) +endif() + # ============================================================================== # Compiler Version Checks # ============================================================================== @@ -598,6 +669,34 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") endif() endif() + # Verify MPI library exists (if checking enabled) + if(ERF_CHECK_MODULES AND MPI_BASE_LIB) + message(DEBUG "Verifying MPI library: ${MPI_BASE_LIB}") + + find_library(MPI_BASE_VERIFY + NAMES ${MPI_BASE_LIB} + PATHS + $ENV{MPICH_DIR}/lib + $ENV{CRAY_MPICH_DIR}/lib + NO_DEFAULT_PATH + ) + + if(MPI_BASE_VERIFY) + message(DEBUG "Verified MPI library exists: ${MPI_BASE_VERIFY}") + else() + message(VERBOSE "MPI library ${MPI_BASE_LIB} not found") + list(APPEND STALE_CONFIG_LOG "MPI library lib${MPI_BASE_LIB}.so not found") + list(APPEND STALE_CONFIG_LOG " Searched in: \$MPICH_DIR/lib, \$CRAY_MPICH_DIR/lib") + + # Try to suggest correct version + if(CMAKE_CXX_COMPILER_VERSION MATCHES "^([0-9]+)\\.([0-9]+)") + set(EXPECTED_VER "${CMAKE_MATCH_1}${CMAKE_MATCH_2}") + list(APPEND STALE_CONFIG_LOG " Expected based on GCC ${CMAKE_CXX_COMPILER_VERSION}: mpi_gnu_${EXPECTED_VER}") + endif() + endif() + unset(MPI_BASE_VERIFY CACHE) + endif() + # Determine GPU type and GTL library if(ERF_ENABLE_CUDA) set(APPLY_FIX4 TRUE) @@ -609,24 +708,42 @@ if(ERF_ENABLE_MPI AND "$ENV{MPICH_GPU_SUPPORT_ENABLED}" STREQUAL "1") set(GTL_LIB "mpi_gtl_hsa") endif() - if(APPLY_FIX4) +if(APPLY_FIX4) message(STATUS "Applying Fix 4: GPU-aware MPI (${GPU_TYPE})") message(VERBOSE "MPI base library: ${MPI_BASE_LIB}") message(VERBOSE "GTL library: ${GTL_LIB}") set(CRAY_MPI_LIBS "-l${MPI_BASE_LIB} -l${GTL_LIB}") - message(DEBUG "Adding: ${CRAY_MPI_LIBS}") + # Only append if not already present if(ERF_ENABLE_CUDA) - set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) + string(FIND "${CMAKE_CUDA_STANDARD_LIBRARIES}" "${CRAY_MPI_LIBS}" already_present) + if(already_present EQUAL -1) + message(DEBUG "Adding to CMAKE_CUDA_STANDARD_LIBRARIES: ${CRAY_MPI_LIBS}") + set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + else() + message(DEBUG "CUDA libraries already contain MPI libs, skipping") + endif() else() - set(CMAKE_HIP_STANDARD_LIBRARIES "${CMAKE_HIP_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) + string(FIND "${CMAKE_HIP_STANDARD_LIBRARIES}" "${CRAY_MPI_LIBS}" already_present) + if(already_present EQUAL -1) + message(DEBUG "Adding to CMAKE_HIP_STANDARD_LIBRARIES: ${CRAY_MPI_LIBS}") + set(CMAKE_HIP_STANDARD_LIBRARIES "${CMAKE_HIP_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + else() + message(DEBUG "HIP libraries already contain MPI libs, skipping") + endif() endif() - set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" - CACHE STRING "" FORCE) + string(FIND "${CMAKE_CXX_STANDARD_LIBRARIES}" "${CRAY_MPI_LIBS}" already_present) + if(already_present EQUAL -1) + message(DEBUG "Adding to CMAKE_CXX_STANDARD_LIBRARIES: ${CRAY_MPI_LIBS}") + set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES} ${CRAY_MPI_LIBS}" + CACHE STRING "" FORCE) + else() + message(DEBUG "CXX libraries already contain MPI libs, skipping") + endif() endif() else() message(DEBUG "Fix 4 not needed (GPU+MPI not enabled or GPU support not enabled)") @@ -867,8 +984,6 @@ if(FIX1_ACTIVE OR FIX2_ACTIVE OR FIX3_ACTIVE OR FIX4_ACTIVE OR FIX56_ACTIVE OR F message(DEBUG "") endif() -# At the end of CrayDetection.cmake, after all fixes: - # ============================================================================== # Generate Concise Config File # ============================================================================== @@ -879,13 +994,19 @@ file(WRITE ${CRAY_CONFIG_FILE} "# ============================================================================== # Auto-detected Cray Configuration # Generated: ${CMAKE_CURRENT_LIST_FILE} -# Date: 2025-11-12 +# Date: ${CMAKE_TIMESTAMP} # ============================================================================== # This file shows the settings auto-detected by CrayDetection.cmake # You can use this as a starting point for a manual config file. # -# To use manually: -# cmake -C cray_detected_config.cmake .. +# To use this config manually: +# +# From build directory: +# cmake -C ${CRAY_CONFIG_FILE} ${CMAKE_SOURCE_DIR} +# +# From source directory: +# cmake -C ${CRAY_CONFIG_FILE} -B ${CMAKE_BINARY_DIR} +# # ============================================================================== ") @@ -991,18 +1112,60 @@ message(STATUS "Generated config: ${CRAY_CONFIG_FILE}") # Add a target to display it add_custom_target(show-cray-config + COMMAND ${CMAKE_COMMAND} -E echo "Displaying auto-detected Cray configuration" COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" COMMAND ${CMAKE_COMMAND} -E echo "Auto-detected Cray Configuration:" COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" COMMAND ${CMAKE_COMMAND} -E cat ${CRAY_CONFIG_FILE} COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "To use this config manually:" - COMMAND ${CMAKE_COMMAND} -E echo " cmake -C ${CRAY_CONFIG_FILE} .." + COMMAND ${CMAKE_COMMAND} -E echo " cmake -C ${CRAY_CONFIG_FILE} ${CMAKE_SOURCE_DIR}" COMMAND ${CMAKE_COMMAND} -E echo "===================================================================" - COMMENT "Displaying auto-detected Cray configuration" - VERBATIM + DEPENDS ${CRAY_CONFIG_FILE} + COMMENT "Showing Cray configuration from ${CRAY_CONFIG_FILE}" ) +# ============================================================================== +# Display Configuration Verification Results +# ============================================================================== + +if(ERF_CHECK_MODULES AND STALE_CONFIG_LOG) + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "STALE CONFIGURATION DETECTED") + message(STATUS "====================================================================") + message(STATUS "") + message(STATUS "Configuration issues found:") + foreach(issue ${STALE_CONFIG_LOG}) + message(STATUS " ${issue}") + endforeach() + message(STATUS "") + message(STATUS "This usually happens when:") + message(STATUS " - You changed which modules are loaded") + message(STATUS " - You switched compiler versions") + message(STATUS " - CMake cache contains old settings") + message(STATUS "") + message(STATUS "To resolve, clean your build:") + message(STATUS "") + message(STATUS " Recommended:") + message(STATUS " cmake --build . --target distclean") + message(STATUS "") + message(STATUS " Or manually:") + message(STATUS " rm -rf CMakeCache.txt CMakeFiles/ cray_detected_config.cmake") + message(STATUS "") + message(STATUS " Then reconfigure:") + message(STATUS " cmake ..") + message(STATUS "") + message(STATUS "To disable this check:") + message(STATUS " cmake -DERF_CHECK_MODULES=OFF ..") + message(STATUS "") + message(STATUS "====================================================================") + message(STATUS "") + + # Make it a hard error instead of warning + message(FATAL_ERROR "Stale configuration detected - clean build required") +endif() + message(DEBUG "=====================================================================") message(DEBUG "To disable auto-fixes: -DERF_DISABLE_CRAY_AUTO_FIXES=ON") message(DEBUG "For verbose output: cmake --log-level=VERBOSE ..") diff --git a/CMake/UtilityTargets.cmake b/CMake/UtilityTargets.cmake index 9fc820d5f1..2876393b2e 100644 --- a/CMake/UtilityTargets.cmake +++ b/CMake/UtilityTargets.cmake @@ -17,66 +17,61 @@ add_custom_target(distclean COMMAND ${CMAKE_COMMAND} -E echo "==================================================================================" COMMAND ${CMAKE_COMMAND} -E echo "Distclean: ${CMAKE_BINARY_DIR}" COMMAND ${CMAKE_COMMAND} -E echo "==================================================================================" - - # CMake configuration files (generated by cmake during configuration) - COMMAND ${CMAKE_COMMAND} -E remove -f - CMakeCache.txt # Main CMake cache - cmake_install.cmake # Install script - cmake_uninstall.cmake # Generated uninstall script - Makefile # Generated Makefile (if using Make generator) - install_manifest.txt # List of installed files - - # CPack files (generated by CPack for packaging) + + # CMake configuration files + COMMAND ${CMAKE_COMMAND} -E remove -f + ${CMAKE_BINARY_DIR}/CMakeCache.txt + ${CMAKE_BINARY_DIR}/cmake_install.cmake + ${CMAKE_BINARY_DIR}/cmake_uninstall.cmake + ${CMAKE_BINARY_DIR}/Makefile + ${CMAKE_BINARY_DIR}/install_manifest.txt + ${CMAKE_BINARY_DIR}/cray_detected_config.cmake + + # CPack files COMMAND ${CMAKE_COMMAND} -E remove -f - CPackConfig.cmake # CPack configuration - CPackSourceConfig.cmake # CPack source package configuration - - # CTest files (generated by enable_testing() or ctest) + ${CMAKE_BINARY_DIR}/CPackConfig.cmake + ${CMAKE_BINARY_DIR}/CPackSourceConfig.cmake + + # CTest files COMMAND ${CMAKE_COMMAND} -E remove -f - CTestTestfile.cmake # CTest configuration - DartConfiguration.tcl # CDash/Dart configuration - + ${CMAKE_BINARY_DIR}/CTestTestfile.cmake + ${CMAKE_BINARY_DIR}/DartConfiguration.tcl + # Project-specific generated files COMMAND ${CMAKE_COMMAND} -E remove -f - ERFConfig.cmake # Generated by configure_file() or export() - compile_commands.json # Generated by CMAKE_EXPORT_COMPILE_COMMANDS - git-state.txt # Custom git state tracking - + ${CMAKE_BINARY_DIR}/ERFConfig.cmake + ${CMAKE_BINARY_DIR}/compile_commands.json + ${CMAKE_BINARY_DIR}/git-state.txt + # CMake-generated directories - COMMAND ${CMAKE_COMMAND} -E remove_directory - CMakeFiles # CMake build system files - Testing # CTest output - _deps # FetchContent dependencies - - # Build output directories (project-specific) - COMMAND ${CMAKE_COMMAND} -E remove_directory - Exec # Built executables - Submodules # Built submodule artifacts - Tests # Built test executables - bin # Binary output directory - erf_srclib # ERF source library output - cmake_packages # CMake package configs - externals # External library builds - - # pkg-config files (generated by configure_file() for *.pc.in) - COMMAND ${CMAKE_COMMAND} -E echo "Removing pkg-config files..." - COMMAND find . -maxdepth 1 -name "*.pc" -type f -delete 2>/dev/null || true - - # Built libraries (linker outputs) - COMMAND ${CMAKE_COMMAND} -E echo "Removing built libraries..." - COMMAND find . -maxdepth 1 -name "lib*.a" -type f -delete 2>/dev/null || true - COMMAND find . -maxdepth 1 -name "lib*.so" -type f -delete 2>/dev/null || true - - # Build logs (custom logging from build scripts) - COMMAND ${CMAKE_COMMAND} -E echo "Removing build logs..." - COMMAND find . -maxdepth 1 -name "build_*.log" -type f -delete 2>/dev/null || true - + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/CMakeFiles + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/Testing + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/_deps + + # Build output directories + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/Exec + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/Submodules + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/Tests + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/erf_srclib + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/cmake_packages + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/externals + + # Use shell commands with explicit directory (if needed for glob patterns) + COMMAND ${CMAKE_COMMAND} -E echo "Removing generated files..." + COMMAND sh -c "cd ${CMAKE_BINARY_DIR} && rm -f *.pc lib*.a lib*.so build_*.log 2>/dev/null || true" + # Summary COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo " DONE: Distclean complete" COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Next steps to reconfigure:" + COMMAND ${CMAKE_COMMAND} -E echo " From build directory: cmake ${CMAKE_SOURCE_DIR}" + COMMAND ${CMAKE_COMMAND} -E echo " Or more simply: cmake .." + COMMAND ${CMAKE_COMMAND} -E echo " From source directory: cmake -B ${CMAKE_BINARY_DIR}" + COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Note: Install directories preserved" - + COMMENT "Removing all CMake configuration and build artifacts" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) \ No newline at end of file From 7eac3491a7d3ac296abe8b61f6c47f7eea86de33 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 14 Nov 2025 08:16:15 -0800 Subject: [PATCH 43/44] Add better reconfigure catching --- CMake/CrayDetection.cmake | 49 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/CMake/CrayDetection.cmake b/CMake/CrayDetection.cmake index ae99899ec1..e374b6765a 100644 --- a/CMake/CrayDetection.cmake +++ b/CMake/CrayDetection.cmake @@ -74,12 +74,19 @@ option(ERF_CHECK_MODULES "Check for stale configuration from module changes" ON) if(ERF_CHECK_MODULES) list(APPEND CMAKE_MESSAGE_CONTEXT "CrayConfigCheck") - + message(DEBUG "Starting configuration verification") - + # Detection log for issues set(STALE_CONFIG_LOG "") - + + # Determine if this is first configure (before we cache anything) + set(IS_FIRST_CONFIGURE FALSE) + if(NOT DEFINED CACHED_LOADED_MODULES) + set(IS_FIRST_CONFIGURE TRUE) + message(DEBUG "First configure detected") + endif() + # Check 1: Module environment changed if(DEFINED ENV{LOADEDMODULES}) set(CURRENT_MODULES "$ENV{LOADEDMODULES}") @@ -92,12 +99,10 @@ if(ERF_CHECK_MODULES) else() message(DEBUG "Module environment unchanged") endif() - else() - message(DEBUG "First configure - caching module environment") endif() set(CACHED_LOADED_MODULES "${CURRENT_MODULES}" CACHE INTERNAL "Modules at configure time") endif() - + # Check 2: PE_ENV changed if(DEFINED ENV{PE_ENV}) set(CURRENT_PE_ENV "$ENV{PE_ENV}") @@ -107,7 +112,7 @@ if(ERF_CHECK_MODULES) endif() set(CACHED_PE_ENV "${CURRENT_PE_ENV}" CACHE INTERNAL "") endif() - + # Check 3: Compiler version changed if(DEFINED CMAKE_CXX_COMPILER_VERSION) if(DEFINED CACHED_CXX_COMPILER_VERSION AND NOT "${CMAKE_CXX_COMPILER_VERSION}" STREQUAL "${CACHED_CXX_COMPILER_VERSION}") @@ -116,22 +121,26 @@ if(ERF_CHECK_MODULES) endif() set(CACHED_CXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}" CACHE INTERNAL "") endif() - + # Check 4: CMAKE_*_STANDARD_LIBRARIES already contains MPI (from previous run) - if(DEFINED CMAKE_CXX_STANDARD_LIBRARIES AND CMAKE_CXX_STANDARD_LIBRARIES) - if(CMAKE_CXX_STANDARD_LIBRARIES MATCHES "mpi_") - message(VERBOSE "CMAKE_CXX_STANDARD_LIBRARIES already contains MPI libraries") - list(APPEND STALE_CONFIG_LOG "CMAKE_CXX_STANDARD_LIBRARIES pre-populated with MPI libs") - list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CXX_STANDARD_LIBRARIES}") + if(NOT IS_FIRST_CONFIGURE) # Only check on reconfigure + if(DEFINED CMAKE_CXX_STANDARD_LIBRARIES AND CMAKE_CXX_STANDARD_LIBRARIES) + if(CMAKE_CXX_STANDARD_LIBRARIES MATCHES "mpi_") + message(VERBOSE "CMAKE_CXX_STANDARD_LIBRARIES already contains MPI libraries") + list(APPEND STALE_CONFIG_LOG "CMAKE_CXX_STANDARD_LIBRARIES pre-populated with MPI libs") + list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CXX_STANDARD_LIBRARIES}") + endif() endif() - endif() - - if(DEFINED CMAKE_CUDA_STANDARD_LIBRARIES AND CMAKE_CUDA_STANDARD_LIBRARIES) - if(CMAKE_CUDA_STANDARD_LIBRARIES MATCHES "mpi_") - message(VERBOSE "CMAKE_CUDA_STANDARD_LIBRARIES already contains MPI libraries") - list(APPEND STALE_CONFIG_LOG "CMAKE_CUDA_STANDARD_LIBRARIES pre-populated with MPI libs") - list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + + if(DEFINED CMAKE_CUDA_STANDARD_LIBRARIES AND CMAKE_CUDA_STANDARD_LIBRARIES) + if(CMAKE_CUDA_STANDARD_LIBRARIES MATCHES "mpi_") + message(VERBOSE "CMAKE_CUDA_STANDARD_LIBRARIES already contains MPI libraries") + list(APPEND STALE_CONFIG_LOG "CMAKE_CUDA_STANDARD_LIBRARIES pre-populated with MPI libs") + list(APPEND STALE_CONFIG_LOG " Found: ${CMAKE_CUDA_STANDARD_LIBRARIES}") + endif() endif() + else() + message(DEBUG "First configure - skipping pre-populated library check") endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) From 3c7d1d01f80e9f27853b17cc13c458a740ef7131 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Fri, 14 Nov 2025 12:09:43 -0800 Subject: [PATCH 44/44] Add build_with_shoc --- Build/build_erf_with_shoc.sh | 104 +++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 Build/build_erf_with_shoc.sh diff --git a/Build/build_erf_with_shoc.sh b/Build/build_erf_with_shoc.sh new file mode 100644 index 0000000000..d212f40033 --- /dev/null +++ b/Build/build_erf_with_shoc.sh @@ -0,0 +1,104 @@ +#!/bin/bash +set -e +set -o pipefail + +# Function to verify if a directory is the ERF repo root +verify_erf_dir() { + local dir=$1 + + # Check for basic structure + if [ ! -f "$dir/CMakeLists.txt" ] || [ ! -d "$dir/Source" ]; then + return 1 + fi + + # Check for "Energy Research and Forecasting" in key files + local found=0 + + if [ -f "$dir/README.rst" ]; then + if grep -q "Energy Research and Forecasting" "$dir/README.rst" 2>/dev/null; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/LICENSE.md" ]; then + if grep -q "Energy Research and Forecasting" "$dir/LICENSE.md" 2>/dev/null; then + found=1 + fi + fi + + if [ $found -eq 0 ] && [ -f "$dir/CITATION.cff" ]; then + if grep -q "Energy Research and Forecasting" "$dir/CITATION.cff" 2>/dev/null; then + found=1 + fi + fi + + return $((1 - found)) +} + +# Function to find ERF repo root with multiple fallbacks +find_erf_dir() { + # Method 1: Use git to find repo root + if command -v git &> /dev/null; then + if git rev-parse --is-inside-work-tree &> /dev/null 2>&1; then + local git_root="$(git rev-parse --show-toplevel)" + if verify_erf_dir "$git_root"; then + ERF_DIR="$git_root" + echo "Detected ERF_DIR from git: $ERF_DIR" + return 0 + fi + fi + fi + + # Method 2: Try going up from script location + local script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + # Script is in Build/Perlmutter/, so go up 2 levels + local candidate="$(cd "$script_dir/../.." && pwd)" + if verify_erf_dir "$candidate"; then + ERF_DIR="$candidate" + echo "Detected ERF_DIR from script location: $ERF_DIR" + return 0 + fi + + # Method 3: Check current directory + if verify_erf_dir "$PWD"; then + ERF_DIR="$PWD" + echo "Detected ERF_DIR from current directory: $ERF_DIR" + return 0 + fi + + echo "Error: Could not auto-detect ERF_DIR" + echo "Verification requires:" + echo " - CMakeLists.txt and Source/ directory" + echo " - 'Energy Research and Forecasting' in README.rst, LICENSE.md, or CITATION.cff" + return 1 +} + +################################################################################### + +# 1. Resolve ERF_DIR +# Detect ERF_DIR +if ! find_erf_dir; then + exit 1 +fi + +export ERF_DIR + +E3SM_DIR="$ERF_DIR/external/E3SM" +if [ ! -d "$E3SM_DIR" ]; then + echo "external/E3SM folder not found, running eamxx_clone.sh..." + source "$ERF_DIR/Build/GNU_Ekat/eamxx_clone.sh" +else + echo "external/E3SM folder already exists, skipping clone." +fi + +# 3. Prepare build directory +echo "Preparing build directory..." +mkdir -p "$ERF_DIR/build" +cp "$ERF_DIR/Build/cmake_with_shoc.sh" "$ERF_DIR/build/" + +# 4. Move into build directory +cd "$ERF_DIR/build" + +# Run cmake setup +echo "Running cmake_with_shoc.sh..." +source cmake_with_shoc.sh