alan-turing-institute
diff --git a/‎.gitignore
Lines changed: 5 additions & 0 deletions b/‎.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎.isort.cfg
Lines changed: 2 additions & 0 deletions b/‎.isort.cfg
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 24 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 24 additions & 0 deletions
diff --git a/‎baskerville/era5-prediction/cdsapi.config.example
Lines changed: 1 addition & 1 deletion b/‎baskerville/era5-prediction/cdsapi.config.example
Lines changed: 1 addition & 1 deletion
diff --git a/‎dawn/README.md
Lines changed: 14 additions & 0 deletions b/‎dawn/README.md
Lines changed: 14 additions & 0 deletions
diff --git a/‎dawn/batch/fine_tune.sh
Lines changed: 21 additions & 0 deletions b/‎dawn/batch/fine_tune.sh
Lines changed: 21 additions & 0 deletions
diff --git a/‎dawn/environments/requirements.txt
Lines changed: 193 additions & 0 deletions b/‎dawn/environments/requirements.txt
Lines changed: 193 additions & 0 deletions
diff --git a/‎dawn/scripts/aurora_loss.py
Lines changed: 43 additions & 0 deletions b/‎dawn/scripts/aurora_loss.py
Lines changed: 43 additions & 0 deletions
@@ -135,6 +135,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+venv_*/
 
 # Spyder project settings
 .spyderproject
@@ -175,3 +176,7 @@ cython_debug/
 
 # Aurora speciic things
 cdsapi.config
+era5
+
+# slurm outputs
+slurm-*.out
@@ -0,0 +1,2 @@
+[settings]
+profile=black
@@ -0,0 +1,24 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: mixed-line-ending
+  - repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+      - id: black
+        exclude: baskerville
+  - repo: https://github.com/pycqa/isort
+    rev: 6.0.1
+    hooks:
+      - id: isort
+        name: isort (python)
+        exclude: baskerville
@@ -1,2 +1,2 @@
 url: https://cds.climate.copernicus.eu/api
-key: 
+key:
@@ -0,0 +1,14 @@
+# Running Aurora on Dawn
+
+## Set up (from scratch)
+
+1. Create venv with python=3.11
+2. Activate venv
+3. Install aurora (`pip install microsoft-aurora`)
+4. Install intel extension for pytorch as per [docs](https://pytorch-extension.intel.com/installation)
+
+## Set up (quick)
+
+1. Create venv with python=3.11
+2. Activate venv
+3. Install from requirements `pip install -r environments/requirements.txt`
@@ -0,0 +1,21 @@
+#!/bin/bash -l
+#SBATCH --job-name=fine-tuning
+#SBATCH --account=airr-p8-rcpp-dawn-gpu
+#SBATCH --partition=pvc9 # Dawn PVC partition
+#SBATCH -n 1   # Number of tasks (usually number of MPI ranks)
+#SBATCH -c 96  # Number of cores per task
+#SBATCH --gres=gpu:4 # Number of requested GPUs per node
+
+set -o xtrace
+set -o errexit
+
+module purge
+module load default-dawn
+
+source ../environments/venv_3_11_9/bin/activate
+
+export ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE
+
+cd ../scripts/
+
+ipython fine_tune.py
@@ -0,0 +1,193 @@
+# Note that Intel's patched version of torch and intel-extension-for-pytorch
+# need to be installed from Intel's package repo and after Aurora is installed.
+annotated-types==0.7.0
+anyio==4.9.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==3.0.0
+async-lru==2.0.5
+attrs==25.3.0
+azure-core==1.34.0
+azure-storage-blob==12.25.1
+babel==2.17.0
+beautifulsoup4==4.13.4
+black==25.1.0
+bleach==6.2.0
+cdsapi==0.7.5
+certifi==2025.4.26
+cffi==1.17.1
+cfgv==3.4.0
+cftime==1.6.4.post1
+charset-normalizer==3.4.2
+click==8.2.1
+comm==0.2.2
+contourpy==1.3.2
+cryptography==44.0.3
+cycler==0.12.1
+datapi==0.4.0
+debugpy==1.8.14
+decorator==5.2.1
+defusedxml==0.7.1
+distlib==0.3.9
+dpcpp-cpp-rt==2025.0.4
+einops==0.8.1
+executing==2.2.0
+fastjsonschema==2.21.1
+filelock==3.13.1
+fonttools==4.58.0
+fqdn==1.5.1
+fsspec==2024.6.1
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.30.2
+identify==2.6.10
+idna==3.10
+impi-devel==2021.14.1
+impi-rt==2021.14.1
+intel-cmplr-lib-rt==2025.0.4
+intel-cmplr-lib-ur==2025.0.4
+intel-cmplr-lic-rt==2025.0.4
+intel-opencl-rt==2025.0.4
+intel-openmp==2025.0.4
+intel-pti==0.10.1
+intel-sycl-rt==2025.0.4
+intel_extension_for_pytorch==2.7.10+xpu
+ipykernel==6.29.5
+ipython==9.2.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+isodate==0.7.2
+isoduration==20.11.0
+isort==6.0.1
+jedi==0.19.2
+Jinja2==3.1.4
+json5==0.12.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2025.4.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.16.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.2
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.8
+MarkupSafe==2.1.5
+matplotlib==3.10.3
+matplotlib-inline==0.1.7
+-e git+ssh://[email protected]/alan-turing-institute/aurora.git@3b1b9934dfe4c310bd40d83ea787b3a1694d2478#egg=microsoft_aurora
+mistune==3.1.3
+mkl==2025.0.1
+mkl-dpcpp==2025.0.1
+mpmath==1.3.0
+multiurl==0.3.5
+mypy_extensions==1.1.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+netCDF4==1.7.2
+networkx==3.3
+nodeenv==1.9.1
+notebook==7.4.2
+notebook_shim==0.2.4
+numpy==2.1.2
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+oneccl==2021.14.1
+oneccl-bind-pt==2.7.0+xpu
+oneccl-devel==2021.14.1
+onemkl-sycl-blas==2025.0.1
+onemkl-sycl-datafitting==2025.0.1
+onemkl-sycl-dft==2025.0.1
+onemkl-sycl-lapack==2025.0.1
+onemkl-sycl-rng==2025.0.1
+onemkl-sycl-sparse==2025.0.1
+onemkl-sycl-stats==2025.0.1
+onemkl-sycl-vm==2025.0.1
+overrides==7.7.0
+packaging==25.0
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==11.0.0
+platformdirs==4.3.7
+pre_commit==4.2.0
+prometheus_client==0.22.0
+prompt_toolkit==3.0.51
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pydantic==2.11.4
+pydantic_core==2.33.2
+Pygments==2.19.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-json-logger==3.3.0
+pytorch-triton-xpu==3.3.0
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.4.0
+referencing==0.36.2
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.25.1
+ruamel.yaml==0.18.10
+ruamel.yaml.clib==0.2.12
+scipy==1.15.2
+Send2Trash==1.8.3
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.7
+stack-data==0.6.3
+sympy==1.13.3
+tbb==2022.1.0
+tcmlib==1.2.0
+terminado==0.18.1
+timm==0.6.13
+tinycss2==1.4.0
+torch==2.7.0+xpu
+torchaudio==2.7.0+xpu
+torchvision==0.22.0+xpu
+tornado==6.5
+tqdm==4.67.1
+traitlets==5.14.3
+triton==3.3.0
+types-python-dateutil==2.9.0.20250516
+typing-inspection==0.4.0
+typing_extensions==4.12.2
+tzdata==2025.2
+umf==0.9.1
+uri-template==1.3.0
+urllib3==2.4.0
+virtualenv==20.31.1
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+widgetsnbextension==4.0.14
+xarray==2025.4.0
@@ -0,0 +1,43 @@
+"""Loss functions for Aurora model training."""
+
+import torch
+
+
+def mae(x_hat_t, x_t):
+    lamb = 2
+    vs_va = 9
+    surface = {
+        "2t": 3.0,
+        "msl": 1.5,
+        "10u": 0.77,
+        "10v": 0.66,
+    }
+    atmos = {
+        "z": 2.8,
+        "q": 0.78,
+        "t": 1.7,
+        "u": 0.87,
+        "v": 0.6,
+    }
+    foo = sum(
+        [
+            (v / (720 * 1440))
+            * torch.sum(
+                torch.abs(x_hat_t.surf_vars[k] - x_t.surf_vars[k][:, :, :720, :])
+            )
+            for k, v in surface.items()
+        ]
+    )
+    bar = sum(
+        [
+            (v / (720 * 1440 * 13))
+            * torch.sum(
+                torch.abs(x_hat_t.atmos_vars[k] - x_t.atmos_vars[k][:, :, :, :720, :])
+            )
+            for k, v in atmos.items()
+        ]
+    )
+
+    alpha = 0.25
+
+    return (lamb / vs_va) * ((alpha * foo) + bar)
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`url: https://cds.climate.copernicus.eu/api`
`2`		`-key:`
	`2`	`+key:`