diff --git a/README.md b/README.md index a417c97..b717b8d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This repository contains configuration and driver code for running an end-to-end ## Quickstart -This section provides a recipe for an end-to-end run of Nested EAGLE on Ursa. +This section provides a recipe for an end-to-end run of Nested EAGLE on Ursa. GNU `make` version 3.82 or higher is required. In the `src/` directory: @@ -19,7 +19,7 @@ In the `src/` directory: This step creates the runtime software environment, comprising conda virtual environments to support data prep, training, inference, and verification. The `conda/` subdirectory it creates is self-contained and can be removed and recreated by running the `make env` command again, as long as pipeline steps are not currently running. -Developers who will be modifying Python driver code should replace `make env` with `make devenv`, which will create the same environments but also install additional code-quality tools for formatting, linting, shellchecking, typechecking, and unit testing. +Developers who will be modifying Python driver code should replace `make env` with `make devenv`, which will create the same environments but also install additional code-quality tools for formatting, linting, shellchecking, typechecking, and YAML linting. **2. Run `make config compose=base:ursa >eagle.yaml` to create the EAGLE YAML config.** @@ -256,9 +256,10 @@ After successful completion, the following `make` targets will be available: ``` bash make format # format Python code -make lint # run the linter on Python code -make shellcheck # run shellcheck on Bash scripts -make typecheck # run the typechecker on Python code +make lint # run a linter on Python code +make shellcheck # run a checker on Bash scripts +make typecheck # run a typechecker on Python code +make yamllint # run a linter on YAML configs make test # all of the above except formatting ``` diff --git a/src/Makefile b/src/Makefile index 61d0fc0..839ee4f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,7 +3,7 @@ ENVS = data inference prewxvx training wxvx HELPERS = prewxvx vx zarr PACKAGE = eagle STEPS = data grids-and-meshes inference prewxvx-global prewxvx-lam training vx-grid-global vx-grid-lam vx-obs-global vx-obs-lam zarr-gfs zarr-hrrr -TOOLING = config devenv env format lint realize shellcheck test typecheck validate +TOOLING = config devenv env format lint realize shellcheck test typecheck validate yamllint activate = @source conda/etc/profile.d/conda.sh && conda activate $(1) check = @$(if $(1),,$(error $(2)= argument required)) @@ -98,7 +98,7 @@ shellcheck: @echo "=> Checking shell scripts" @(set -x && shellcheck --format=gcc --severity=info --shell=bash $(BASHSRCS)) -test: lint shellcheck typecheck +test: lint shellcheck typecheck yamllint training: $(call activate,training) @@ -146,6 +146,11 @@ vx-obs-global: vx-obs-lam: @$(make) vx truth=grid2obs extent=lam +yamllint: + $(call activate,base) + @echo "=> Linting YAML configs" + @(set -x && yamllint --no-warnings config/) + zarr: $(call activate,data) ifeq ($(task),?) diff --git a/src/config/base.yaml b/src/config/base.yaml index 92e1939..5d96aba 100644 --- a/src/config/base.yaml +++ b/src/config/base.yaml @@ -1,5 +1,7 @@ # This is the base EAGLE config. It currently configures the Nested EAGLE case. +# yamllint disable rule:anchors rule:line-length + app: base: /path/to/eagle/src experiment_name: default diff --git a/src/config/ursa.yaml b/src/config/ursa.yaml index e20f561..e53389d 100644 --- a/src/config/ursa.yaml +++ b/src/config/ursa.yaml @@ -1,5 +1,7 @@ # Configuration for the NOAA RDHPCS Ursa platform. +# yamllint disable rule:anchors rule:line-length + app: gpu: batchargs: &gpu-batchargs diff --git a/src/setup b/src/setup index 6e7eb6d..da8e90d 100755 --- a/src/setup +++ b/src/setup @@ -1,18 +1,25 @@ #!/usr/bin/env bash -# shellcheck disable=1090,1091,2046,2048,2068,2086,2206,2155 +# shellcheck disable=1090,1091,2046,2048,2068,2086,2206,2207 -set -aeu +set -aeuo pipefail INSTALLDIR=$PWD/conda -MAKE="make=4.4.*" UWTOOLS="uwtools=2.13.*" # In all functions, make variables local unless they are either intended for global use, or set in # a subshell that exits at the end of the function. +cachedir() { + ( + conda_activate base + echo $CONDA_PREFIX/cache + ) +} + conda_activate() { - local env=$1 + local env + env=$1 source $INSTALLDIR/etc/profile.d/conda.sh # Activation scripts shipped with conda packages may contain statements either expanding undefined # variables or exiting with error status, so temporarily disable the checks for these conditions, @@ -23,13 +30,12 @@ conda_activate() { } conda_create() { - local name=$1 + local name + name=$1 + shift conda_activate base msg Creating environment: $name - # In each runtime virtual environment, "make format" requires jq to format JSON Schema files; - # invoking make targets requires make itself; and driver execution and config management require - # uwtools, so install those. - conda create -y -q -c ufs-community -n $* jq $MAKE $UWTOOLS + conda create -y -q -c ufs-community -n $name $* $UWTOOLS ( # When runtime virtual environments are activated, set XDG_CACHE_HOME so that pip caches package # files in the runtime area, rather than in the user's home directory, which might impact their @@ -41,7 +47,7 @@ conda_create() { if [[ -v XDG_CACHE_HOME ]]; then export EAGLE_OLD_XDG_CACHE_HOME=\$XDG_CACHE_HOME fi -export XDG_CACHE_HOME=\$_CONDA_ROOT/cache +export XDG_CACHE_HOME=$(cachedir) EOF dir_deactivate=$CONDA_PREFIX/etc/conda/deactivate.d mkdir -pv $dir_deactivate @@ -56,86 +62,145 @@ EOF } conda_create_base() { - true + local name pkgs + name=base + pkgs=( + jq=1.8.* + shellcheck=0.11.* + $UWTOOLS + yamllint=1.38.* + ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + conda_install $name -c ufs-community ${pkgs[@]} } conda_create_data() { - local name=data - conda_env_exists $name && return || true - conda_create $name impi_rt=2021.13.* mpi4py=4.1.* numpy=1.26.4 pandas=2.3.3 types-pyyaml ufs2arco=0.18.* - ( - conda_activate $name - dir_activate=$CONDA_PREFIX/etc/conda/activate.d - mkdir -pv $dir_activate - cat <>$CONDA_PREFIX/etc/conda/activate.d/eagle.sh + local args name pkgs + name=data + pkgs=( + impi_rt=2021.13.* + mpi4py=4.1.* + numpy=1.26.4 + pandas=2.3.3 + types-pyyaml + ufs2arco=0.18.* + ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + args=( $name ${pkgs[@]} ) + if conda_env_exists $name; then + conda_install ${args[@]} + else + conda_create ${args[@]} + ( + conda_activate $name + dir_activate=$CONDA_PREFIX/etc/conda/activate.d + mkdir -pv $dir_activate + cat <>$CONDA_PREFIX/etc/conda/activate.d/eagle.sh if [[ -v FI_PSM3_UUID ]]; then export EAGLE_OLD_FI_PSM3_UUID=\$FI_PSM3_UUID fi export FI_PSM3_UUID=eagle EOF - dir_deactivate=$CONDA_PREFIX/etc/conda/deactivate.d - mkdir -pv $dir_deactivate - cat <>$CONDA_PREFIX/etc/conda/deactivate.d/eagle.sh + dir_deactivate=$CONDA_PREFIX/etc/conda/deactivate.d + mkdir -pv $dir_deactivate + cat <>$CONDA_PREFIX/etc/conda/deactivate.d/eagle.sh unset FI_PSM3_UUID if [[ -v EAGLE_OLD_FI_PSM3_UUID ]]; then export FI_PSM3_UUID=\$EAGLE_OLD_FI_PSM3_UUID unset EAGLE_OLD_FI_PSM3_UUID fi EOF - ) + ) + fi pip_install $name anemoi-datasets==0.5.* anemoi-graphs==0.6.* } conda_create_inference() { - local name=inference - conda_env_exists $name && return || true - ( - CONDA_OVERRIDE_CUDA=$(cuda_release) conda_create $name flash-attn=2.8.* numpy=2.2.6 python=3.12 - conda_activate $name - write_anemoi_activation_scripts + local args name pkgs + name=inference + pkgs=( + flash-attn=2.8.* + numpy=2.2.6.* + python=3.12 ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + args=( $name ${pkgs[@]} ) + if conda_env_exists $name; then + conda_install ${args[@]} + else + conda_create ${args[@]} + write_anemoi_activation_scripts $name + fi pip_install $name anemoi-inference==0.7.* anemoi-models==0.9.* eagle-tools } conda_create_prewxvx() { - local name=prewxvx - conda_env_exists $name && return || true - conda_create $name python=3.13 xesmf + local args name pkgs + name=prewxvx + pkgs=( + numpy=2.2.6.* + pandas=2.3.3.* + python=3.13 + xesmf=0.8.* + ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + args=( $name ${pkgs[@]} ) + if conda_env_exists $name; then + conda_install ${args[@]} + else + conda_create ${args[@]} + fi pip_install $name eagle-tools } conda_create_training() { - local name=training - conda_env_exists $name && return || true - ( - CONDA_OVERRIDE_CUDA=$(cuda_release) conda_create $name flash-attn=2.8.* numpy=1.26.4 python=3.12 - conda_activate $name - write_anemoi_activation_scripts + local args name pkgs + name=training + pkgs=( + flash-attn=2.8.* + numpy=1.26.4.* + python=3.12 ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + args=( $name ${pkgs[@]} ) + if conda_env_exists $name; then + conda_install ${args[@]} + else + conda_create ${args[@]} + write_anemoi_activation_scripts $name + fi pip_install $name anemoi-models==0.9.* anemoi-training==0.6.* } conda_create_wxvx() { - local name=wxvx - conda_env_exists $name && return || true - conda_create $name -c oar-gsl wxvx=0.7.* + local args name pkgs + name=wxvx + pkgs=( + wxvx=0.7.* + ) + test -v EAGLE_DEV && pkgs+=( $(devpkgs) ) + args=( $name -c oar-gsl ${pkgs[@]} ) + if conda_env_exists $name; then + conda_install ${args[@]} + else + conda_create ${args[@]} + fi } conda_env_exists() { name=$1 ( conda_activate base - if conda env list --json | jq -r .envs_details[].name | grep -q "^$name$"; then - msg Found existing conda environment: $name - exit 0 - fi - exit 1 + conda env list --json | jq -r .envs_details[].name | grep -q "^${name}$" + test $? -eq 0 && msg Found existing conda environment: $name ) } conda_install() { - local name=$1 + local name + name=$1 shift + msg Installing conda packages into environment: $name ( conda_activate $name set +eu @@ -144,13 +209,6 @@ conda_install() { ) } -conda_install_devpkgs() { - local name=$1 - local devpkgs=( $MAKE mypy=1.19.* ruff=0.14.* ) - msg Installing dev packages into environment: $name - conda_install $name "${devpkgs[@]}" -} - cuda_command_file() { local path val val=${ARGS[cudascript]} @@ -166,19 +224,27 @@ cuda_release() { ) } +devpkgs() { + local pkgs=( + mypy=1.19.* + ruff=0.14.* + ) + echo ${pkgs[@]} +} + install_conda() { + local installer url ver if [[ -d $INSTALLDIR ]]; then msg Found existing conda installation: $INSTALLDIR return fi - local ver=25.9.1-0 - local installer=Miniforge3-$ver-$(uname -s)-$(uname -p).sh - local url=https://github.com/conda-forge/miniforge/releases/download/$ver/$installer + ver=25.9.1-0 + installer=Miniforge3-$ver-$(uname -s)-$(uname -p).sh + url=https://github.com/conda-forge/miniforge/releases/download/$ver/$installer msg Installing conda wget -nv $url bash $installer -bfp $INSTALLDIR rm -v $installer - conda_install base -c ufs-community $UWTOOLS } msg() { @@ -188,7 +254,7 @@ msg() { parse_kvargs() { local arg key required val declare -g -A ARGS - for arg in "$@"; do + for arg in $@; do IFS="=" read -r key val <<< $arg ARGS[$key]=$val done @@ -200,15 +266,20 @@ parse_kvargs() { } pip_install() { - local name=$1 + local name + name=$1 shift + msg Installing pip packages into environment: $name ( conda_activate $name - pip install "$@" + pip install $@ | grep -v "^Requirement already satisfied: .*$" ) } write_anemoi_activation_scripts() { + local name=$1 + ( + conda_activate $name cat <>$CONDA_PREFIX/etc/conda/activate.d/eagle.sh modules="\$(module --terse list 2>/dev/null | grep -v 'No modules loaded' | tr '\n' ' ')" if [[ -n "\$modules" ]]; then @@ -223,14 +294,16 @@ if [[ -v EAGLE_OLD_MODULES ]]; then unset EAGLE_OLD_MODULES fi EOF + ) } parse_kvargs $@ install_conda -conda_install base shellcheck $UWTOOLS +CONDA_OVERRIDE_CUDA=$(cuda_release) +XDG_CACHE_HOME=$(cachedir) +export CONDA_OVERRIDE_CUDA XDG_CACHE_HOME environments=( base data inference prewxvx training wxvx ) -for name in ${environments[*]}; do +for name in ${environments[@]}; do conda_create_$name - test -v EAGLE_DEV && conda_install_devpkgs $name done msg Done