Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions AMD_INTRODUCTION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Icon4py performance on MI300

## Quickstart

```
# Connect to Beverin (CSCS system with MI300A)
ssh beverin.cscs.ch
```

In Beverin:

```
# Enter scratch directory
cd $SCRATCH

# Clone icon4py and checkout the correct branch
git clone [email protected]:C2SM/icon4py.git
cd icon4py
git checkout amd_profiling

# Pull the correct `uenv` image. *!* NECESSARY ONLY ONCE *!*
uenv image pull build::prgenv-gnu/25.12:2333839235

# Start the uenv and mount the ROCm 7.1.0 environment. *!* This needs to be executed before running anything everytime *!*
uenv start --view default prgenv-gnu/25.12:2333839235

# Install the necessary venv
bash amd_scripts/install_icon4py_venv.sh

# Source venv
source .venv/bin/activate

# Source other necessary environment variables
source amd_scripts/setup_env.sh

# Set GT4Py related environment variables
export GT4PY_UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE="1"
export GT4PY_BUILD_CACHE_LIFETIME=persistent
export GT4PY_BUILD_CACHE_DIR=amd_profiling_granule
export GT4PY_COLLECT_METRICS_LEVEL=10
export GT4PY_DYCORE_ENABLE_METRICS="1"
export GT4PY_ADD_GPU_TRACE_MARKERS="1"
export HIPFLAGS="-std=c++17 -fPIC -O3 -march=native -Wno-unused-parameter -save-temps -Rpass-analysis=kernel-resource-usage"
```
32 changes: 32 additions & 0 deletions amd_scripts/install_icon4py_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

set -e

date

# Go to the root of the icon4py repository to run the installation from there
ICON4PY_GIT_ROOT=$(git rev-parse --show-toplevel)
cd $ICON4PY_GIT_ROOT

# Set necessasry flags for compilation
source $ICON4PY_GIT_ROOT/amd_scripts/setup_env.sh

# Install uv locally
export PATH="$PWD/bin:$PATH"
if [ ! -x "$PWD/bin/uv" ]; then
curl -LsSf https://astral.sh/uv/install.sh | UV_UNMANAGED_INSTALL="$PWD/bin" sh
else
echo "# uv already installed at $PWD/bin/uv"
fi

# Install icon4py, gt4py, DaCe and other basic dependencies using uv
uv sync --extra rocm7_0 --python $(which python3.12)

# Activate virtual environment
source .venv/bin/activate

# Install the requirements for rocprofiler-compute so we can run the profiler from the same environment
uv pip install -r /user-environment/linux-zen3/rocprofiler-compute-7.1.0-rjjjgkz67w66bp46jw7bvlfyduzr6vhv/libexec/rocprofiler-compute/requirements.txt

echo "# install done"
date
13 changes: 13 additions & 0 deletions amd_scripts/setup_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export CC="$(which gcc)"
export MPICH_CC="$(which gcc)"
export CXX="$(which g++)"
export MPICH_CXX="$(which g++)"
export HUGETLB_ELFMAP="no"
export HUGETLB_MORECORE="no"
export PYTHONOPTIMIZE="2"
export HCC_AMDGPU_TARGET="gfx942"
export ROCM_HOME="/user-environment/env/default"
export HIPCC=$(which hipcc)
export ROCM_VERSION="7.1.0"
export LD_LIBRARY_PATH=/user-environment/linux-zen3/rocprofiler-dev-7.1.0-i7wbbbgrx7jjp4o2xroyj5j263dkzplv/lib:$LD_LIBRARY_PATH
export LD_PRELOAD=/user-environment/env/default/lib/libomp.so:$LD_PRELOAD
1 change: 1 addition & 0 deletions model/common/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ io = [
"uxarray==2024.3.0",
"xarray[complete]>=2024.3.0"
]
rocm7_0 = ['amd-cupy>=13.0'] # TODO(havogt): add gt4py[rocm7_0] once available

[project.urls]
repository = "https://github.com/C2SM/icon4py"
Expand Down
6 changes: 4 additions & 2 deletions model/common/src/icon4py/model/common/model_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def dict_values_to_list(d: dict[str, Any]) -> dict[str, list]:
def get_dace_options(
program_name: str, **backend_descriptor: Any
) -> model_backends.BackendDescriptor:
is_rocm_backend = backend_descriptor.get("device") == model_backends.DeviceType.ROCM
optimization_args = backend_descriptor.get("optimization_args", {})
optimization_hooks = optimization_args.get("optimization_hooks", {})
if program_name in [
Expand All @@ -54,8 +55,9 @@ def get_dace_options(
backend_descriptor["use_zero_origin"] = True
if program_name == "graupel_run":
optimization_args["fuse_tasklets"] = True
optimization_args["gpu_maxnreg"] = 80
optimization_args["gpu_block_size_2d"] = (64, 6)
if not is_rocm_backend:
optimization_args["gpu_maxnreg"] = 80
optimization_args["gpu_block_size_2d"] = (64, 6)
optimization_args["gpu_memory_pool"] = False
optimization_args["make_persistent"] = True
if optimization_hooks:
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ distributed = ["icon4py-common[distributed]"]
fortran = ["icon4py-tools>=0.0.6"]
io = ["icon4py-common[io]"]
profiling = ['viztracer>=1.1.0']
rocm7_0 = ["icon4py-common[rocm7_0]"]
testing = ["icon4py-testing"]

[project.urls]
Expand Down Expand Up @@ -405,7 +406,13 @@ explicit = true
name = 'gridtools'
url = 'https://gridtools.github.io/pypi/'

[[tool.uv.index]]
explicit = true
name = 'amd'
url = 'https://pypi.amd.com/rocm-7.0.2/simple'

[tool.uv.sources]
amd-cupy = {index = "amd"}
dace = {index = "gridtools"}
ghex = {git = "https://github.com/msimberg/GHEX.git", branch = "async-mpi"}
# gt4py = {git = "https://github.com/GridTools/gt4py", branch = "main"}
Expand Down
26 changes: 24 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading