Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ jobs:
make -j
make install
fi
echo "RMM_FLAGS=-Drmm_ROOT=$PARFLOW_DEP_DIR/rmm" >> $GITHUB_ENV
echo "RMM_FLAGS=-DRMM_ROOT=$PARFLOW_DEP_DIR/rmm" >> $GITHUB_ENV

- name: Umpire Install
env:
Expand All @@ -414,7 +414,7 @@ jobs:
cmake --build build --parallel 4
cmake --install build
fi
echo "UMPIRE_FLAGS=-Dumpire_ROOT=$PARFLOW_DEP_DIR/Umpire" >> $GITHUB_ENV
echo "UMPIRE_FLAGS=-DUMPIRE_ROOT=$PARFLOW_DEP_DIR/Umpire" >> $GITHUB_ENV

- name: Kokkos Install
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/self_hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
cmake .. -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/depend/rmm -DBUILD_TESTS=OFF
make -j
make install
echo "RMM_FLAGS=-Drmm_ROOT=$GITHUB_WORKSPACE/depend/rmm" >> $GITHUB_ENV
echo "RMM_FLAGS=-DRMM_ROOT=$GITHUB_WORKSPACE/depend/rmm" >> $GITHUB_ENV

- name: Install Umpire
if: matrix.config.memory_manager == 'umpire'
Expand All @@ -63,7 +63,7 @@ jobs:
cmake .. -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/depend/Umpire -DENABLE_CUDA=On
make -j
make install
echo "UMPIRE_FLAGS=-Dumpire_ROOT=$GITHUB_WORKSPACE/depend/Umpire" >> $GITHUB_ENV
echo "UMPIRE_FLAGS=-DUMPIRE_ROOT=$GITHUB_WORKSPACE/depend/Umpire" >> $GITHUB_ENV

- name: Configure ParFlow
run: |
Expand Down
22 changes: 11 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,27 +125,27 @@ else(PARFLOW_ACCELERATOR_BACKEND STREQUAL "none")
message(FATAL_ERROR "ERROR: Unknown backend type! PARFLOW_ACCELERATOR_BACKEND=${PARFLOW_ACCELERATOR_BACKEND} does not exist!")
endif(PARFLOW_ACCELERATOR_BACKEND STREQUAL "none")

# Include Umpire or RMM memory manager for pool allocation
# Include UMPIRE or RMM memory manager for pool allocation
if(PARFLOW_HAVE_CUDA OR PARFLOW_HAVE_KOKKOS)

if (DEFINED umpire_ROOT AND DEFINED RMM_ROOT)
message(FATAL_ERROR "ERROR: Cannot have both Umpire and RMM enabled at the same time.")
endif(DEFINED umpire_ROOT AND DEFINED RMM_ROOT)
if (DEFINED UMPIRE_ROOT AND DEFINED RMM_ROOT)
message(FATAL_ERROR "ERROR: Cannot have both UMPIRE and RMM enabled at the same time.")
endif(DEFINED UMPIRE_ROOT AND DEFINED RMM_ROOT)

# Determine memory manager and print single status message
if(DEFINED umpire_ROOT)
find_package(umpire REQUIRED)
if(DEFINED UMPIRE_ROOT)
find_package(umpire REQUIRED HINTS ${UMPIRE_ROOT})
set(PARFLOW_HAVE_UMPIRE "yes")
message(STATUS "MEMORY MANAGER: Umpire is sleceted")
message(STATUS "MEMORY MANAGER: UMPIRE is sleceted")

elseif(DEFINED rmm_ROOT)
find_package(rmm REQUIRED)
elseif(DEFINED RMM_ROOT)
find_package(rmm REQUIRED HINTS ${RMM_ROOT})
set(PARFLOW_HAVE_RMM "yes")
message(STATUS "MEMORY MANAGER: RMM is sleceted")

else(DEFINED umpire_ROOT)
else(DEFINED UMPIRE_ROOT)
message(STATUS "MEMORY MANAGER: NO memoery manager is sleceted")
endif(DEFINED umpire_ROOT)
endif(DEFINED UMPIRE_ROOT)
endif(PARFLOW_HAVE_CUDA OR PARFLOW_HAVE_KOKKOS)

#-----------------------------------------------------------------------------
Expand Down
16 changes: 8 additions & 8 deletions README-GPU.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ Building with CUDA or Kokkos may improve the performance significantly for large

## CMake

Building with GPU acceleration requires a [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) installation, and in case Kokkos is used, [Kokkos](https://github.com/kokkos/kokkos) installation. However, the performance can be further improved by using pool allocation for Unified Memory. Supported memory managers for pool allocation are: [RMM v0.10](https://github.com/rapidsai/rmm/tree/branch-0.10) and [Umpire](https://umpire.readthedocs.io/en/develop/). Note that we are in the process of updating RMM to the newest API, but that should not affect users. Note that you should use only one memory manager, you can't pick both RMM and Umpire. Performance can be improved even more with direct communication between GPUs (requires using a CUDA-Aware MPI library).
Building with GPU acceleration requires a [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) installation, and in case Kokkos is used, [Kokkos](https://github.com/kokkos/kokkos) installation. However, the performance can be further improved by using pool allocation. Supported memory managers for pool allocation are: [RMM](https://github.com/rapidsai/rmm) and [Umpire](https://github.com/llnl/Umpire/tree/develop). Note that you should use only one memory manager, you can't pick both RMM and Umpire. Performance can be improved even more with direct communication between GPUs (requires using a CUDA-Aware MPI library).

The GPU acceleration is activated by specifying either *PARFLOW_ACCELERATOR_BACKEND=cuda* option to the CMake, i.e.,

```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=cuda
```
or *PARFLOW_ACCELERATOR_BACKEND=kokkos* and *DKokkos_ROOT=/path/to/Kokkos* i.e.,
or *PARFLOW_ACCELERATOR_BACKEND=kokkos* and *DKOKKOS_ROOT=/path/to/Kokkos* i.e.,
```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKokkos_ROOT=/path/to/Kokkos
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKOKKOS_ROOT=/path/to/Kokkos
```
where *DPARFLOW_AMPS_LAYER=mpi1* leverages GPU-based data packing and unpacking. By default, the packed data is copied to a host staging buffer which is then passed for MPI to avoid special requirements for the MPI library. Direct communication between GPUs (with [GPUDirect P2P/RDMA](https://developer.nvidia.com/gpudirect)) can be activated by specifying an environment variable *PARFLOW_USE_GPUDIRECT=1* during runtime in which case the memory copy between CPU and GPU is avoided and a GPU pointer is passed for MPI, but this requires a CUDA-Aware MPI library (support for Unified Memory is not required with the native CUDA backend because the pointers passed to the MPI library point to pinned GPU memory allocations, but is required with the Kokkos backend).

Expand All @@ -28,17 +28,17 @@ cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_
```
or
```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKokkos_ROOT=/path/to/Kokkos -DRMM_ROOT=/path/to/RMM
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKOKKOS_ROOT=/path/to/Kokkos -DRMM_ROOT=/path/to/RMM
```

Similarly, the Umpire library can be activated by specifying the Umpire root directory with -Dumpire_ROOT=/path/to/umpire/root
Similarly, the Umpire library can be activated by specifying the Umpire root directory with -DUMPIRE_ROOT=/path/to/umpire/root

```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=cuda -Dumpire_ROOT=/path/to/umpire
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=cuda -DUMPIRE_ROOT=/path/to/umpire
```
or
```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKokkos_ROOT=/path/to/Kokkos -Dumpire_ROOT=/path/to/umpire
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=kokkos -DKOKKOS_ROOT=/path/to/Kokkos -DUMPIRE_ROOT=/path/to/umpire
```

Note that on some systems, nvcc cannot locate the MPI include files by default, if this is the case, defining the environment variable CUDAHOSTCXX=mpicxx might help.
Expand All @@ -47,7 +47,7 @@ Finally, you must make sure you are building the code for the correct GPU archit


```shell
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=cuda -Dumpire_ROOT=/path/to/umpire -DCMAKE_CUDA_ARCHITECTURES=90
cmake ../parflow -DPARFLOW_AMPS_LAYER=mpi1 -DCMAKE_BUILD_TYPE=Release -DPARFLOW_ENABLE_TIMING=TRUE -DPARFLOW_HAVE_CLM=ON -DCMAKE_INSTALL_PREFIX=${PARFLOW_DIR} -DPARFLOW_ACCELERATOR_BACKEND=cuda -DUMPIRE_ROOT=/path/to/umpire -DCMAKE_CUDA_ARCHITECTURES=90
```

## Running Parflow with GPU acceleration
Expand Down
Loading
Loading