diff --git a/.github/workflows/benchmarks-last-release.yml b/.github/workflows/benchmarks-last-release.yml
deleted file mode 100644
index bf3f5de480f..00000000000
--- a/.github/workflows/benchmarks-last-release.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-name: Benchmark compare last release
-
-on:
-  push:
-    branches:
-      - main
-  workflow_dispatch:
-
-jobs:
-  benchmark:
-    name: Linux
-    runs-on: ubuntu-latest
-    env:
-      ASV_DIR: "./asv_bench"
-      CONDA_ENV_FILE: ci/requirements/environment.yml
-
-    steps:
-      # We need the full repo to avoid this issue
-      # https://github.com/actions/checkout/issues/23
-      - uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-
-      - name: Set up conda environment
-        uses: mamba-org/setup-micromamba@v2
-        with:
-          micromamba-version: "1.5.10-0"
-          environment-file: ${{env.CONDA_ENV_FILE}}
-          environment-name: xarray-tests
-          cache-environment: true
-          cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark"
-          create-args: >-
-            asv
-
-      - name: "Get Previous tag"
-        id: previoustag
-        uses: "WyriHaximus/github-action-get-previous-tag@v1"
-        # with:
-        #   fallback: 1.0.0 # Optional fallback tag to use when no tag can be found
-
-      - name: Run benchmarks
-        shell: bash -l {0}
-        id: benchmark
-        env:
-          OPENBLAS_NUM_THREADS: 1
-          MKL_NUM_THREADS: 1
-          OMP_NUM_THREADS: 1
-          ASV_FACTOR: 1.5
-          ASV_SKIP_SLOW: 1
-        run: |
-          set -x
-          # ID this runner
-          asv machine --yes
-          echo "Baseline: ${{ steps.previoustag.outputs.tag }} "
-          echo "Contender: ${{ github.sha }}"
-          # Use mamba for env creation
-          # export CONDA_EXE=$(which mamba)
-          export CONDA_EXE=$(which conda)
-          # Run benchmarks for current commit against base
-          ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
-          asv continuous $ASV_OPTIONS ${{ steps.previoustag.outputs.tag }}  ${{ github.sha }} \
-              | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \
-              | tee benchmarks.log
-          # Report and export results for subsequent steps
-          if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then
-              exit 1
-          fi
-        working-directory: ${{ env.ASV_DIR }}
-
-      - name: Add instructions to artifact
-        if: always()
-        run: |
-          cp benchmarks/README_CI.md benchmarks.log .asv/results/
-        working-directory: ${{ env.ASV_DIR }}
-
-      - uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: asv-benchmark-results-${{ runner.os }}
-          path: ${{ env.ASV_DIR }}/.asv/results
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
deleted file mode 100644
index 113e8184f56..00000000000
--- a/.github/workflows/benchmarks.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: Benchmark
-
-on:
-  pull_request:
-    types: [opened, reopened, synchronize, labeled]
-  workflow_dispatch:
-
-env:
-  PR_HEAD_LABEL: ${{ github.event.pull_request.head.label }}
-
-jobs:
-  benchmark:
-    if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmark') && github.event_name == 'pull_request' || contains( github.event.pull_request.labels.*.name, 'topic-performance') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
-    name: Linux
-    runs-on: ubuntu-latest
-    env:
-      ASV_DIR: "./asv_bench"
-      CONDA_ENV_FILE: ci/requirements/environment-benchmark.yml
-
-    steps:
-      # We need the full repo to avoid this issue
-      # https://github.com/actions/checkout/issues/23
-      - uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-
-      - name: Set up conda environment
-        uses: mamba-org/setup-micromamba@v2
-        with:
-          micromamba-version: "1.5.10-0"
-          environment-file: ${{env.CONDA_ENV_FILE}}
-          environment-name: xarray-benchmark
-          cache-environment: true
-          cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark"
-          # add "build" because of https://github.com/airspeed-velocity/asv/issues/1385
-          create-args: >-
-            asv
-            python-build
-            mamba<=1.5.10
-
-      - name: Run benchmarks
-        shell: bash -l {0}
-        id: benchmark
-        env:
-          OPENBLAS_NUM_THREADS: 1
-          MKL_NUM_THREADS: 1
-          OMP_NUM_THREADS: 1
-          ASV_FACTOR: 1.5
-          ASV_SKIP_SLOW: 1
-        run: |
-          set -x
-          # ID this runner
-          asv machine --yes
-          echo "Baseline:  ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
-          echo "Contender: ${GITHUB_SHA} ($PR_HEAD_LABEL)"
-          # Run benchmarks for current commit against base
-          ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
-          asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
-              | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \
-              | tee benchmarks.log
-          # Report and export results for subsequent steps
-          if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then
-              exit 1
-          fi
-        working-directory: ${{ env.ASV_DIR }}
-
-      - name: Add instructions to artifact
-        if: always()
-        run: |
-          cp benchmarks/README_CI.md benchmarks.log .asv/results/
-        working-directory: ${{ env.ASV_DIR }}
-
-      - uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: asv-benchmark-results-${{ runner.os }}
-          path: ${{ env.ASV_DIR }}/.asv/results
diff --git a/.github/workflows/benchmarks_report.yml b/.github/workflows/benchmarks_report.yml
new file mode 100644
index 00000000000..bdaf76e0391
--- /dev/null
+++ b/.github/workflows/benchmarks_report.yml
@@ -0,0 +1,83 @@
+# Post any reports generated by benchmarks_run.yml .
+# Separated for security:
+#  https://securitylab.github.com/research/github-actions-preventing-pwn-requests/
+
+name: benchmarks-report
+run-name: Report benchmark results
+
+on:
+  workflow_run:
+    workflows: [benchmarks-run]
+    types:
+      - completed
+
+jobs:
+  download:
+    runs-on: ubuntu-latest
+    outputs:
+      reports_exist: ${{ steps.unzip.outputs.reports_exist }}
+    steps:
+      - name: Download artifact
+        id: download-artifact
+        # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#using-data-from-the-triggering-workflow
+        uses: actions/github-script@v8
+        with:
+          script: |
+            let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: context.payload.workflow_run.id,
+            });
+            let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "benchmark_reports"
+            })[0];
+            if (typeof matchArtifact != 'undefined') {
+              let download = await github.rest.actions.downloadArtifact({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                artifact_id: matchArtifact.id,
+                archive_format: 'zip',
+              });
+              let fs = require('fs');
+              fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/benchmark_reports.zip`, Buffer.from(download.data));
+            };
+
+      - name: Unzip artifact
+        id: unzip
+        run: |
+          if test -f "benchmark_reports.zip"; then
+            reports_exist=1
+            unzip benchmark_reports.zip -d benchmark_reports
+          else
+            reports_exist=0
+          fi
+          echo "reports_exist=$reports_exist" >> "$GITHUB_OUTPUT"
+
+      - name: Store artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark_reports
+          path: benchmark_reports
+
+  post_reports:
+    runs-on: ubuntu-latest
+    needs: download
+    if: needs.download.outputs.reports_exist == 1
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v5
+
+      - name: Download artifact
+        uses: actions/download-artifact@v5
+        with:
+          name: benchmark_reports
+          path: .github/workflows/benchmark_reports
+
+      - name: Set up Python
+        # benchmarks/bm_runner.py only needs builtins to run.
+        uses: actions/setup-python@v6
+
+      - name: Post reports
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: benchmarks/bm_runner.py _gh_post
diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml
new file mode 100644
index 00000000000..cd88559c13b
--- /dev/null
+++ b/.github/workflows/benchmarks_run.yml
@@ -0,0 +1,175 @@
+# Use ASV to check for performance regressions, either:
+#  - In the last 24 hours' commits.
+#  - Introduced by this pull request.
+
+name: benchmarks-run
+run-name: Run benchmarks
+
+on:
+  schedule:
+    # Runs every day at 23:00.
+    - cron: "0 23 * * *"
+  workflow_dispatch:
+    inputs:
+      first_commit:
+        description: "First commit to benchmark (see bm_runner.py > Overnight)."
+        required: false
+        type: string
+  pull_request:
+    # Add the `labeled` type to the default list.
+    types: [labeled, opened, synchronize, reopened]
+
+jobs:
+  pre-checks:
+    # This workflow supports two different scenarios (overnight and branch).
+    #  The pre-checks job determines which scenario is being run.
+    runs-on: ubuntu-latest
+    if: github.repository == 'pydata/xarray'
+    outputs:
+      overnight: ${{ steps.overnight.outputs.check }}
+      branch: ${{ steps.branch.outputs.check }}
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 2
+      - id: files-changed
+        uses: marceloprado/has-changed-path@df1b7a3161b8fb9fd8c90403c66a9e66dfde50cb
+        with:
+          # SEE ALSO .github/labeler.yml .
+          paths: ci/requirements/locks/*.lock
+      - id: overnight
+        name: Check overnight scenario
+        if: github.event_name != 'pull_request'
+        run: echo "check=true" >> "$GITHUB_OUTPUT"
+      - id: branch
+        name: Check branch scenario
+        if: >
+          github.event_name == 'pull_request'
+          &&
+          (
+            steps.files-changed.outputs.changed == 'true'
+            ||
+            github.event.label.name == 'benchmark_this'
+            ||
+            github.event.label.name == 'run-benchmarks'
+            ||
+            github.event.label.name == 'topic-performance'
+          )
+        run: echo "check=true" >> "$GITHUB_OUTPUT"
+
+
+  benchmark:
+    runs-on: ubuntu-latest
+    needs: pre-checks
+    if: >
+      needs.pre-checks.outputs.overnight == 'true' ||
+      needs.pre-checks.outputs.branch == 'true'
+
+    env:
+      IRIS_TEST_DATA_LOC_PATH: benchmarks
+      IRIS_TEST_DATA_PATH: benchmarks/iris-test-data
+      IRIS_TEST_DATA_VERSION: "2.28"
+      # Lets us manually bump the cache to rebuild
+      ENV_CACHE_BUILD: "0"
+      TEST_DATA_CACHE_BUILD: "2"
+
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - name: Checkout repo
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+
+      - name: Install run dependencies
+        run: pip install asv nox!=2025.05.01
+
+      - name: Cache environment directories
+        id: cache-env-dir
+        uses: actions/cache@v4
+        with:
+          path: |
+            .nox
+            benchmarks/.asv/env
+            $CONDA/pkgs
+          key: ${{ runner.os }}-${{ hashFiles('requirements/') }}-${{ env.ENV_CACHE_BUILD }}
+
+      - name: Cache test data directory
+        id: cache-test-data
+        uses: actions/cache@v4
+        with:
+          path: |
+            ${{ env.IRIS_TEST_DATA_PATH }}
+          key:
+            test-data-${{ env.IRIS_TEST_DATA_VERSION }}-${{ env.TEST_DATA_CACHE_BUILD }}
+
+      - name: Fetch the test data
+        if: steps.cache-test-data.outputs.cache-hit != 'true'
+        run: |
+          wget --quiet https://github.com/SciTools/iris-test-data/archive/v${IRIS_TEST_DATA_VERSION}.zip -O iris-test-data.zip
+          unzip -q iris-test-data.zip
+          mkdir --parents ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_LOC_PATH}
+          mv iris-test-data-${IRIS_TEST_DATA_VERSION} ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}
+
+      - name: Set test data var
+        run: |
+          echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV
+
+      - name: Benchmark this pull request
+        # If the 'branch' condition(s) are met: use the bm_runner to compare
+        #  the proposed merge with the base branch.
+        if: needs.pre-checks.outputs.branch == 'true'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.number }}
+        run: |
+          nox -s benchmarks -- branch origin/${{ github.base_ref }}
+
+      - name: Run overnight benchmarks
+        # If the 'overnight' condition(s) are met: use the bm_runner to compare
+        #  each of the last 24 hours' commits to their parents.
+        id: overnight
+        if: needs.pre-checks.outputs.overnight == 'true'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        # The first_commit argument allows a custom starting point - useful
+        #  for manual re-running.
+        run: |
+          first_commit=${{ inputs.first_commit }}
+          if [ "$first_commit" == "" ]
+          then
+            first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1)
+          fi
+
+          if [ "$first_commit" != "" ]
+          then
+            nox -s benchmarks -- overnight $first_commit
+          fi
+
+      - name: Warn of failure
+        # The overnight run is not on a pull request, so a failure could go
+        #  unnoticed without being actively advertised.
+        if: >
+          failure() &&
+          steps.overnight.outcome == 'failure'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          title="Overnight benchmark workflow failed: \`${{ github.run_id }}\`"
+          body="Generated by GHA run [\`${{github.run_id}}\`](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})"
+          gh issue create --title "$title" --body "$body" --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY
+
+      - name: Upload any benchmark reports
+        # Uploading enables more downstream processing e.g. posting a PR comment.
+        if: success() || steps.overnight.outcome == 'failure'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark_reports
+          path: .github/workflows/benchmark_reports
+
+      - name: Archive asv results
+        # Store the raw ASV database(s) to help manual investigations.
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: asv-raw-results
+          path: benchmarks/.asv/results
diff --git a/.github/workflows/benchmarks_validate.yml b/.github/workflows/benchmarks_validate.yml
new file mode 100644
index 00000000000..e3f090b32c0
--- /dev/null
+++ b/.github/workflows/benchmarks_validate.yml
@@ -0,0 +1,48 @@
+name: benchmarks-validate
+run-name: Validate the benchmarking setup
+
+on:
+  push:
+    branches:
+      - "main"
+      - "v*x"
+    tags:
+      - "v*"
+  pull_request:
+    branches:
+      - "*"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+
+    env:
+      # Lets us manually bump the cache to rebuild
+      ENV_CACHE_BUILD: "0"
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+
+      - name: Install run dependencies
+        run: pip install asv nox!=2025.05.01
+
+      - name: Cache environment directories
+        id: cache-env-dir
+        uses: actions/cache@v4
+        with:
+          path: |
+            .nox
+            benchmarks/.asv/env
+            $CONDA/pkgs
+          key: ${{ runner.os }}-${{ hashFiles('requirements/') }}-${{ env.ENV_CACHE_BUILD }}
+
+      - name: Validate setup
+        run: nox -s benchmarks -- validate
diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
new file mode 100644
index 00000000000..7aa8803426a
--- /dev/null
+++ b/.github/workflows/refresh-lockfiles.yml
@@ -0,0 +1,111 @@
+# This workflow periodically creates new environment lock files based on the newest
+# available packages and dependencies.
+#
+# Environment specifications are given as conda environment.yml files found in
+# `requirements/py**.yml`.  These state the packages required, the conda channels
+# that the packages will be pulled from, and any versions of packages that need to be
+# pinned at specific versions.
+#
+# For environments that have changed, a pull request will be made and submitted
+# to the main branch
+
+name: Refresh Lockfiles
+
+
+on:
+  pull_request:
+    branches:
+      - "*"
+  workflow_call:
+
+jobs:
+  get_python_matrix:
+    # Determines which Python versions should be included in the matrix used in
+    # the gen_lockfiles job.
+    if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'"
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.get_py.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v5
+      - id: get_py
+        run: echo "MATRIX=$(ls -1 ci/requirements/environment-benchmark.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT}
+
+  gen_lockfiles:
+    # This is a matrix job: it splits to create new lockfiles for each
+    # of the CI test python versions.
+    if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'"
+    runs-on: ubuntu-latest
+    needs: get_python_matrix
+
+    strategy:
+      matrix:
+        python: ${{ fromJSON(needs.get_python_matrix.outputs.MATRIX) }}
+
+    steps:
+      - uses: actions/checkout@v5
+      - name: install requirements
+        run: |
+          source $CONDA/bin/activate base
+          conda update -n base --all
+      - name: generate lockfile
+        run: |
+          pipx run conda-lock -k explicit -p linux-64 -f ci/requirements/${{matrix.python}}.yml
+          mv conda-linux-64.lock ${{matrix.python}}-linux-64.lock
+      - name: output lockfile
+        uses: actions/upload-artifact@v4
+        with:
+          name: lock-artifacts-${{matrix.python}}
+          path: ${{matrix.python}}-linux-64.lock
+
+  create_pr:
+    # Once the matrix job has completed all the lock files will have been
+    # uploaded as artifacts.
+    # Download the artifacts, add them to the repo, and create a PR.
+    if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'"
+    runs-on: ubuntu-latest
+    needs: gen_lockfiles
+
+    steps:
+      - uses: actions/checkout@v5
+      - name: get artifacts
+        uses: actions/download-artifact@v5
+        with:
+          path: ${{ github.workspace }}/ci/requirements/locks
+          merge-multiple: true
+
+      - name: "Generate token"
+        uses: actions/create-github-app-token@v2
+        id: generate-token
+        with:
+          app-id: ${{ secrets.AUTH_APP_ID }}
+          private-key: ${{ secrets.AUTH_APP_PRIVATE_KEY }}
+
+      - name: Create Pull Request
+        id: cpr
+        uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e
+        with:
+          token: ${{ steps.generate-token.outputs.token }}
+          commit-message: Updated environment lockfiles
+          committer: "Lockfile bot <noreply@github.com>"
+          author: "Lockfile bot <noreply@github.com>"
+          delete-branch: true
+          branch: auto-update-lockfiles
+          title: "[CI Bot] environment lockfiles auto-update"
+          body: |
+            Lockfiles updated to the latest resolvable environment.
+            ### If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch.
+          labels: |
+            New: Pull Request
+            Bot
+
+      - name: Check Pull Request
+        if: steps.cpr.outputs.pull-request-number != ''
+        run: |
+          echo "### :rocket: Pull-Request Summary" >> ${GITHUB_STEP_SUMMARY}
+          echo "" >> ${GITHUB_STEP_SUMMARY}
+          echo "The following lock-files pull-request has been auto-generated:"
+          echo "- **PR** #${{ steps.cpr.outputs.pull-request-number }}" >> ${GITHUB_STEP_SUMMARY}
+          echo "- **URL** ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY}
+          echo "- **Operation** [${{ steps.cpr.outputs.pull-request-operation }}]" >> ${GITHUB_STEP_SUMMARY}
+          echo "- **SHA** ${{ steps.cpr.outputs.pull-request-head-sha }}" >> ${GITHUB_STEP_SUMMARY}
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 00000000000..09ea920176f
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,175 @@
+# SciTools Performance Benchmarking
+
+SciTools uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv)
+(ASV) setup to benchmark performance. This is primarily designed to check for
+performance shifts between commits using statistical analysis, but can also
+be easily repurposed for manual comparative and scalability analyses.
+
+The benchmarks are automatically run overnight
+[by a GitHub Action](../.github/workflows/benchmark.yml), with any notable
+shifts in performance being flagged in a new GitHub issue.
+
+## Running benchmarks
+
+On GitHub: a Pull Request can be benchmarked by adding the 
+https://github.com/SciTools/iris/labels/benchmark_this 
+label to the PR (to run a second time: just remove and re-add the label).
+Note that a benchmark run could take an hour or more to complete.
+This runs a comparison between the PR branch's ``HEAD`` and its merge-base with
+the PR's base branch, thus showing performance differences introduced
+by the PR. (This run is managed by 
+[the aforementioned GitHub Action](../.github/workflows/benchmark.yml)).
+
+To run locally: the **benchmark runner** provides conveniences for
+common benchmark setup and run tasks, including replicating the benchmarking
+performed by GitHub Actions workflows. This can be accessed by:
+
+- The Nox `benchmarks` session - (use
+  `nox -s benchmarks -- --help` for details).
+- `benchmarks/bm_runner.py` (use the `--help` argument for details).
+- Directly running `asv` commands from the `benchmarks/` directory (check
+  whether environment setup has any extra dependencies - see 
+  [Benchmark environments](#benchmark-environments)).
+
+### Reducing run time
+
+A significant portion of benchmark run time is environment management. Run-time
+can be reduced by co-locating the benchmark environment and your 
+[Conda package cache](https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/custom-env-and-pkg-locations.html) 
+on the same [file system](https://en.wikipedia.org/wiki/File_system), if they 
+are not already. This can be done in several ways:
+
+- Temporarily reconfiguring `env_parent` in
+  [`_asv_delegated_abc`](_asv_delegated_abc.py) to reference a location on the same 
+  file system as the Conda package cache.
+- Using an alternative Conda package cache location during the benchmark run,
+  e.g. via the `$CONDA_PKGS_DIRS` environment variable.
+- Moving your repo checkout to the same file system as the Conda package cache.
+
+### Environment variables
+
+* `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
+`iris-test-data` content, and your local `site.cfg` is not available for
+benchmark scripts. The benchmark runner defers to any value already set in
+the shell, but will otherwise download `iris-test-data` and set the variable
+accordingly.
+* `DATA_GEN_PYTHON` - required - path to a Python executable that can be
+used to generate benchmark test objects/files; see
+[Data generation](#data-generation). The benchmark runner sets this 
+automatically, but will defer to any value already set in the shell. Note that
+[Mule](https://github.com/metomi/mule) will be  automatically installed into 
+this environment, and sometimes 
+[iris-test-data](https://github.com/SciTools/iris-test-data) (see 
+`OVERRIDE_TEST_DATA_REPOSITORY`).
+* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
+test data, which the benchmark scripts will create if it doesn't already
+exist. Defaults to `<root>/benchmarks/.data/` if not set. Note that some of
+the generated files, especially in the 'SPerf' suite, are many GB in size so
+plan accordingly.
+* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks
+decorated with `@on_demand_benchmark` are included in the ASV run. Usually
+coupled with the ASV `--bench` argument to only run the benchmark(s) of
+interest. Is set during the benchmark runner `cperf` and `sperf` sub-commands.
+* `ASV_COMMIT_ENVS` - optional - instruct the 
+[delegated environment management](#benchmark-environments) to create a
+dedicated environment for each commit being benchmarked when set (to any 
+value). This means that benchmarking commits with different environment 
+requirements will not be delayed by repeated environment setup - especially 
+relevant given the [benchmark runner](bm_runner.py)'s use of
+[--interleave-rounds](https://asv.readthedocs.io/en/stable/commands.html?highlight=interleave-rounds#asv-run),
+or any time you know you will repeatedly benchmark the same commit. **NOTE:**
+SciTools environments tend to large so this option can consume a lot of disk 
+space.
+
+## Writing benchmarks
+
+[See the ASV docs](https://asv.readthedocs.io/) for full detail.
+
+### What benchmarks to write
+
+It is not possible to maintain a full suite of 'unit style' benchmarks:
+
+* Benchmarks take longer to run than tests.
+* Small benchmarks are more vulnerable to noise - they report a lot of false
+positive regressions.
+
+We therefore recommend writing benchmarks representing scripts or single
+operations that are likely to be run at the user level.
+
+The drawback of this approach: a reported regression is less likely to reveal
+the root cause (e.g. if a commit caused a regression in coordinate-creation 
+time, but the only benchmark covering this was for file-loading). Be prepared
+for manual investigations; and consider committing any useful benchmarks as 
+[on-demand benchmarks](#on-demand-benchmarks) for future developers to use.
+
+### Data generation
+
+**Important:** be sure not to use the benchmarking environment to generate any
+test objects/files, as this environment changes with each commit being
+benchmarked, creating inconsistent benchmark 'conditions'. The
+[generate_data](./benchmarks/generate_data/__init__.py) module offers a
+solution; read more detail there.
+
+### ASV re-run behaviour
+
+Note that ASV re-runs a benchmark multiple times between its `setup()` routine.
+This is a problem for benchmarking certain SciTools operations such as data
+realisation, since the data will no longer be lazy after the first run.
+Consider writing extra steps to restore objects' original state _within_ the
+benchmark itself.
+
+If adding steps to the benchmark will skew the result too much then re-running
+can be disabled by setting an attribute on the benchmark: `number = 1`. To
+maintain result accuracy this should be accompanied by increasing the number of
+repeats _between_ `setup()` calls using the `repeat` attribute.
+`warmup_time = 0` is also advisable since ASV performs independent re-runs to
+estimate run-time, and these will still be subject to the original problem.
+The `@disable_repeat_between_setup` decorator in 
+[`benchmarks/__init__.py`](benchmarks/__init__.py) offers a convenience for 
+all this.
+
+### Custom benchmarks
+
+SciTools benchmarking implements custom benchmark types, such as a `tracemalloc`
+benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more
+detail.
+
+### Scaling / non-Scaling Performance Differences
+
+**(We no longer advocate the below for benchmarks run during CI, given the
+limited available runtime and risk of false-positives. It remains useful for
+manual investigations).**
+
+When comparing performance between commits/file-type/whatever it can be helpful
+to know if the differences exist in scaling or non-scaling parts of the 
+operation under test. This can be done using a size parameter, setting
+one value to be as small as possible (e.g. a scalar value), and the other to
+be significantly larger (e.g. a 1000x1000 array). Performance differences
+might only be seen for the larger value, or the smaller, or both, getting you
+closer to the root cause.
+
+### On-demand benchmarks
+
+Some benchmarks provide useful insight but are inappropriate to be included in
+a benchmark run by default, e.g. those with long run-times or requiring a local
+file. These benchmarks should be decorated with `@on_demand_benchmark`
+(see [benchmarks init](./benchmarks/__init__.py)), which
+sets the benchmark to only be included in a run when the `ON_DEMAND_BENCHMARKS`
+environment variable is set. Examples include the CPerf and SPerf benchmark
+suites for the UK Met Office NG-VAT project.
+
+## Benchmark environments
+
+We have disabled ASV's standard environment management, instead using an
+environment built using the same scripts that set up the package test 
+environments. 
+This is done using ASV's plugin architecture - see
+[`asv_delegated.py`](asv_delegated.py) and associated 
+references in [`asv.conf.json`](asv.conf.json) (`environment_type` and 
+`plugins`).
+
+(ASV is written to control the environment(s) that benchmarks are run in -
+minimising external factors and also allowing it to compare between a matrix
+of dependencies (each in a separate environment). We have chosen to sacrifice
+these features in favour of testing each commit with its intended dependencies,
+controlled by the test environment setup script(s)).
diff --git a/benchmarks/_asv_delegated_abc.py b/benchmarks/_asv_delegated_abc.py
new file mode 100644
index 00000000000..0546a3c6a2d
--- /dev/null
+++ b/benchmarks/_asv_delegated_abc.py
@@ -0,0 +1,249 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""ASV plug-in providing an alternative :class:`asv.environments.Environment` subclass.
+
+Preps an environment via custom user scripts, then uses that as the
+benchmarking environment.
+
+This module is intended as the generic code that can be shared between
+repositories. Providing a functional benchmarking environment relies on correct
+subclassing of the :class:`_DelegatedABC` class to specialise it for the repo in
+question. The parent and subclass are separated into their own dedicated files,
+which isolates ALL repo-specific code to a single file, thus simplifying the
+templating process.
+
+"""
+
+from abc import ABC, abstractmethod
+from contextlib import contextmanager, suppress
+from os import environ
+from pathlib import Path
+import sys
+
+from asv.console import log
+from asv.environment import Environment, EnvironmentUnavailable
+from asv.repo import Repo
+
+
+class _DelegatedABC(Environment, ABC):
+    """Manage a benchmark environment using custom user scripts, run at each commit.
+
+    Ignores user input variations - ``matrix`` / ``pythons`` /
+    ``exclude``, since environment is being managed outside ASV.
+
+    A vanilla :class:`asv.environment.Environment` is created for containing
+    the expected ASV configuration files and checked-out project. The actual
+    'functional' environment is created/updated using
+    :meth:`_prep_env_override`, then the location is recorded via
+    a symlink within the ASV environment. The symlink is used as the
+    environment path used for any executable calls (e.g.
+    ``python my_script.py``).
+
+    Intended as the generic parent class that can be shared between
+    repositories. Providing a functional benchmarking environment relies on
+    correct subclassing of this class to specialise it for the repo in question.
+
+    Warnings
+    --------
+    :class:`_DelegatedABC` is an abstract base class. It MUST ONLY be used via
+    subclasses implementing their own :meth:`_prep_env_override`, and also
+    :attr:`tool_name`, which must be unique.
+
+    """
+
+    tool_name = "delegated-ABC"
+    """Required by ASV as a unique identifier of the environment type."""
+
+    DELEGATED_LINK_NAME = "delegated_env"
+    """The name of the symlink to the delegated environment."""
+
+    COMMIT_ENVS_VAR = "ASV_COMMIT_ENVS"
+    """Env var that instructs a dedicated environment be created per commit."""
+
+    def __init__(self, conf, python, requirements, tagged_env_vars):
+        """Get a 'delegated' environment based on the given ASV config object.
+
+        Parameters
+        ----------
+        conf : dict
+            ASV configuration object.
+
+        python : str
+            Ignored - environment management is delegated. The value is always
+            ``DELEGATED``.
+
+        requirements : dict (str -> str)
+            Ignored - environment management is delegated. The value is always
+            an empty dict.
+
+        tagged_env_vars : dict (tag, key) -> value
+            Ignored - environment management is delegated. The value is always
+            an empty dict.
+
+        Raises
+        ------
+        EnvironmentUnavailable
+            The original environment or delegated environment cannot be created.
+
+        """
+        ignored = []
+        if python:
+            ignored.append(f"{python=}")
+        if requirements:
+            ignored.append(f"{requirements=}")
+        if tagged_env_vars:
+            ignored.append(f"{tagged_env_vars=}")
+        message = (
+            f"Ignoring ASV setting(s): {', '.join(ignored)}. Benchmark "
+            "environment management is delegated to third party script(s)."
+        )
+        log.warning(message)
+        self._python = "DELEGATED"
+        self._requirements = {}
+        self._tagged_env_vars = {}
+        super().__init__(
+            conf,
+            self._python,
+            self._requirements,
+            self._tagged_env_vars,
+        )
+
+        self._path_undelegated = Path(self._path)
+        """Preserves the 'true' path of the environment so that self._path can
+        be safely modified and restored."""
+
+    @property
+    def _path_delegated(self) -> Path:
+        """The path of the symlink to the delegated environment."""
+        return self._path_undelegated / self.DELEGATED_LINK_NAME
+
+    @property
+    def _delegated_found(self) -> bool:
+        """Whether self._path_delegated successfully resolves to a directory."""
+        resolved = None
+        with suppress(FileNotFoundError):
+            resolved = self._path_delegated.resolve(strict=True)
+        result = resolved is not None and resolved.is_dir()
+        return result
+
+    def _symlink_to_delegated(self, delegated_env_path: Path) -> None:
+        """Create the symlink to the delegated environment."""
+        self._path_delegated.unlink(missing_ok=True)
+        self._path_delegated.parent.mkdir(parents=True, exist_ok=True)
+        self._path_delegated.symlink_to(delegated_env_path, target_is_directory=True)
+        assert self._delegated_found
+
+    def _setup(self):
+        """Temporarily try to set the user's active env as the delegated env.
+
+        Environment prep will be run anyway once ASV starts checking out
+        commits, but this step tries to provide a usable environment (with
+        python, etc.) at the moment that ASV expects it.
+
+        """
+        current_env = Path(sys.executable).parents[1]
+        message = (
+            "Temporarily using user's active environment as benchmarking "
+            f"environment: {current_env} . "
+        )
+        try:
+            self._symlink_to_delegated(current_env)
+            _ = self.find_executable("python")
+        except Exception:
+            message = (
+                f"Delegated environment {self.name} not yet set up (unable to "
+                "determine current environment)."
+            )
+            self._path_delegated.unlink(missing_ok=True)
+
+        message += "Correct environment will be set up at the first commit checkout."
+        log.warning(message)
+
+    @abstractmethod
+    def _prep_env_override(self, env_parent_dir: Path) -> Path:
+        """Run aspects of :meth:`_prep_env` that vary between repos.
+
+        This is the method that is expected to do the preparing
+        (:meth:`_prep_env` only performs pre- and post- steps). MUST be
+        overridden in any subclass environments before they will work.
+
+        Parameters
+        ----------
+        env_parent_dir : Path
+            The directory that the prepared environment should be placed in.
+
+        Returns
+        -------
+        Path
+            The path to the prepared environment.
+        """
+        pass
+
+    def _prep_env(self, commit_hash: str) -> None:
+        """Prepare the delegated environment for the given commit hash."""
+        message = (
+            f"Running delegated environment management for: {self.name} "
+            f"at commit: {commit_hash[:8]}"
+        )
+        log.info(message)
+
+        env_parent = Path(self._env_dir).resolve()
+        new_env_per_commit = self.COMMIT_ENVS_VAR in environ
+        if new_env_per_commit:
+            env_parent = env_parent / commit_hash[:8]
+
+        delegated_env_path = self._prep_env_override(env_parent)
+        assert delegated_env_path.is_relative_to(env_parent)
+
+        # Record the environment's path via a symlink within this environment.
+        self._symlink_to_delegated(delegated_env_path)
+
+        message = f"Environment {self.name} updated to spec at {commit_hash[:8]}"
+        log.info(message)
+
+    def checkout_project(self, repo: Repo, commit_hash: str) -> None:
+        """Check out the working tree of the project at given commit hash."""
+        super().checkout_project(repo, commit_hash)
+        self._prep_env(commit_hash)
+
+    @contextmanager
+    def _delegate_path(self):
+        """Context manager to use the delegated env path as this env's path."""
+        if not self._delegated_found:
+            message = f"Delegated environment not found at: {self._path_delegated}"
+            log.error(message)
+            raise EnvironmentUnavailable(message)
+
+        try:
+            self._path = str(self._path_delegated)
+            yield
+        finally:
+            self._path = str(self._path_undelegated)
+
+    def find_executable(self, executable):
+        """Find an executable (e.g. python, pip) in the DELEGATED environment.
+
+        Raises
+        ------
+        OSError
+            If the executable is not found in the environment.
+        """
+        if not self._delegated_found:
+            # Required during environment setup. OSError expected if executable
+            #  not found.
+            raise OSError
+
+        with self._delegate_path():
+            return super().find_executable(executable)
+
+    def run_executable(self, executable, args, **kwargs):
+        """Run a given executable (e.g. python, pip) in the DELEGATED environment."""
+        with self._delegate_path():
+            return super().run_executable(executable, args, **kwargs)
+
+    def run(self, args, **kwargs):
+        # This is not a specialisation - just implementing the abstract method.
+        log.debug(f"Running '{' '.join(args)}' in {self.name}")
+        return self.run_executable("python", args, **kwargs)
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 00000000000..bc0f6e55e35
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,27 @@
+{
+    "version": 1,
+    "project": "scitools-iris",
+    "project_url": "https://github.com/SciTools/iris",
+    "repo": "..",
+    "environment_type": "delegated",
+    "show_commit_url": "https://github.com/scitools/iris/commit/",
+    "branches": ["upstream/main"],
+
+    "benchmark_dir": "./benchmarks",
+    "env_dir": ".asv/env",
+    "results_dir": ".asv/results",
+    "html_dir": ".asv/html",
+    "plugins": [".asv_delegated"],
+
+    "command_comment": [
+        "The inherited setup of the Iris test environment takes care of ",
+        "Iris-installation too, and in the case of Iris no specialised ",
+        "uninstall or build commands are needed to get it working either.",
+
+        "We do however need to install the custom benchmarks for them to be",
+        "usable."
+    ],
+    "install_command": [],
+    "uninstall_command": [],
+    "build_command": ["python {conf_dir}/custom_bms/install.py"]
+}
diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py
new file mode 100644
index 00000000000..85ba432ac75
--- /dev/null
+++ b/benchmarks/asv_delegated.py
@@ -0,0 +1,153 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Repository-specific adaptation of :mod:`_asv_delegated_abc`."""
+
+import ast
+import enum
+from os import environ
+from os.path import getmtime
+from pathlib import Path
+import re
+
+from asv import util as asv_util
+
+from _asv_delegated_abc import _DelegatedABC
+
+
+class Delegated(_DelegatedABC):
+    """Specialism of :class:`_DelegatedABC` for benchmarking this repo."""
+
+    tool_name = "delegated"
+
+    def _prep_env_override(self, env_parent_dir: Path) -> Path:
+        """Environment preparation specialised for this repo.
+
+        Scans the checked-out commit of Iris to work out the appropriate
+        preparation command, including gathering any extra information that said
+        command needs.
+
+        Parameters
+        ----------
+        env_parent_dir : Path
+            The directory that the prepared environment should be placed in.
+
+        Returns
+        -------
+        Path
+            The path to the prepared environment.
+        """
+        # The project checkout.
+        build_dir = Path(self._build_root) / self._repo_subdir
+
+        # Older iterations of setup.py are incompatible with setuptools>=80.
+        #  (Most dependencies are protected by lock-files, but build
+        #   dependencies in pyproject.toml are independent).
+        setup_py = build_dir / "setup.py"
+        pyproject = build_dir / "pyproject.toml"
+        if setup_py.is_file() and "setuptools.command.develop" in setup_py.read_text():
+            with pyproject.open("r+") as file_write:
+                lines = file_write.readlines()
+                for i, line in enumerate(lines):
+                    if line == "requires = [\n":
+                        next_line = lines[i + 1]
+                        indent = next_line[: len(next_line) - len(next_line.lstrip())]
+
+                        lines.insert(i + 1, f'{indent}"setuptools<80",\n')
+                        break
+                file_write.seek(0)
+                file_write.writelines(lines)
+
+        class Mode(enum.Enum):
+            """The scenarios where the correct env setup script is known."""
+
+            NOX = enum.auto()
+            """``PY_VER=x.xx nox --session=tests --install-only`` is supported."""
+
+        mode = None
+
+        noxfile = build_dir / "noxfile.py"
+        if noxfile.is_file():
+            # Our noxfile originally did not support `--install-only` - you
+            #  could either run the tests, or run nothing at all. Adding
+            #  `run_always` to `prepare_venv` enabled environment setup without
+            #  running tests.
+            noxfile_tree = ast.parse(source=noxfile.read_text())
+            prep_session = next(
+                filter(
+                    lambda node: getattr(node, "name", "") == "prepare_venv",
+                    ast.walk(noxfile_tree),
+                )
+            )
+            prep_session_code = ast.unparse(prep_session)
+            if (
+                "session.run(" not in prep_session_code
+                and "session.run_always(" in prep_session_code
+            ):
+                mode = Mode.NOX
+
+        match mode:
+            # Just NOX for now but the architecture is here for future cases.
+            case Mode.NOX:
+                # Need to determine a single Python version to run with.
+                # req_dir = build_dir / "ci" / "requirements"
+                # lockfile_dir = req_dir / "locks"
+                # if not lockfile_dir.is_dir():
+                    # lockfile_dir = req_dir / "ci" / "nox.lock"
+
+                # if not lockfile_dir.is_dir():
+                    # message = f"No lockfile directory found in the expected locations, got '{lockfile_dir}'."
+                    # raise FileNotFoundError(message)
+
+                # def py_ver_from_lockfiles(lockfile: Path) -> str:
+                    # pattern = re.compile(r"py(\d+)-")
+                    # search = pattern.search(lockfile.name)
+                    # assert search is not None
+                    # version = search.group(1)
+                    # return f"{version[0]}.{version[1:]}"
+
+                # python_versions = [
+                    # py_ver_from_lockfiles(lockfile)
+                    # for lockfile in lockfile_dir.glob("*.lock")
+                # ]
+                # python_version = max(python_versions)
+                python_version = "3.13"
+
+                # Construct and run the environment preparation command.
+                local_envs = dict(environ)
+                local_envs["PY_VER"] = python_version
+                # Prevent Nox re-using env with wrong Python version.
+                env_parent_dir = (
+                    env_parent_dir / f"nox{python_version.replace('.', '')}"
+                )
+                env_command = [
+                    "nox",
+                    f"--envdir={env_parent_dir}",
+                    "--session=tests",
+                    "--install-only",
+                    "--no-error-on-external-run",
+                    "--verbose",
+                ]
+                _ = asv_util.check_output(
+                    env_command,
+                    timeout=self._install_timeout,
+                    cwd=build_dir,
+                    env=local_envs,
+                )
+
+                env_parent_contents = list(env_parent_dir.iterdir())
+                if len(env_parent_contents) != 1:
+                    message = (
+                        f"{env_parent_dir} contains {len(env_parent_contents)} "
+                        "items, expected 1. Cannot determine the environment "
+                        "directory."
+                    )
+                    raise FileNotFoundError(message)
+                (delegated_env_path,) = env_parent_contents
+
+            case _:
+                message = "No environment setup is known for this commit of Iris."
+                raise NotImplementedError(message)
+
+        return delegated_env_path
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 00000000000..b8a54f71a42
--- /dev/null
+++ b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1,74 @@
+import itertools
+import os
+
+import numpy as np
+
+_counter = itertools.count()
+
+
+def parameterized(names, params):
+    def decorator(func):
+        func.param_names = names
+        func.params = params
+        return func
+
+    return decorator
+
+
+def requires_dask():
+    try:
+        import dask  # noqa: F401
+    except ImportError as err:
+        raise NotImplementedError() from err
+
+
+def requires_sparse():
+    try:
+        import sparse  # noqa: F401
+    except ImportError as err:
+        raise NotImplementedError() from err
+
+
+def randn(shape, frac_nan=None, chunks=None, seed=0):
+    rng = np.random.default_rng(seed)
+    if chunks is None:
+        x = rng.standard_normal(shape)
+    else:
+        import dask.array as da
+
+        rng = da.random.default_rng(seed)
+        x = rng.standard_normal(shape, chunks=chunks)
+
+    if frac_nan is not None:
+        inds = rng.choice(range(x.size), int(x.size * frac_nan))
+        x.flat[inds] = np.nan
+
+    return x
+
+
+def randint(low, high=None, size=None, frac_minus=None, seed=0):
+    rng = np.random.default_rng(seed)
+    x = rng.integers(low, high, size)
+    if frac_minus is not None:
+        inds = rng.choice(range(x.size), int(x.size * frac_minus))
+        x.flat[inds] = -1
+
+    return x
+
+
+def _skip_slow():
+    """
+    Use this function to skip slow or highly demanding tests.
+
+    Use it as a `Class.setup` method or a `function.setup` attribute.
+
+    Examples
+    --------
+    >>> from . import _skip_slow
+    >>> def time_something_slow():
+    ...     pass
+    ...
+    >>> time_something.setup = _skip_slow
+    """
+    if os.environ.get("ASV_SKIP_SLOW", "0") == "1":
+        raise NotImplementedError("Skipping this test...")
diff --git a/benchmarks/benchmarks/accessors.py b/benchmarks/benchmarks/accessors.py
new file mode 100644
index 00000000000..259c06160ac
--- /dev/null
+++ b/benchmarks/benchmarks/accessors.py
@@ -0,0 +1,25 @@
+import numpy as np
+
+import xarray as xr
+
+from . import parameterized
+
+NTIME = 365 * 30
+
+
+@parameterized(["calendar"], [("standard", "noleap")])
+class DateTimeAccessor:
+    def setup(self, calendar):
+        np.random.randn(NTIME)
+        time = xr.date_range("2000", periods=30 * 365, calendar=calendar)
+        data = np.ones((NTIME,))
+        self.da = xr.DataArray(data, dims="time", coords={"time": time})
+
+    def time_dayofyear(self, calendar):
+        _ = self.da.time.dt.dayofyear
+
+    def time_year(self, calendar):
+        _ = self.da.time.dt.year
+
+    def time_floor(self, calendar):
+        _ = self.da.time.dt.floor("D")
diff --git a/benchmarks/benchmarks/alignment.py b/benchmarks/benchmarks/alignment.py
new file mode 100644
index 00000000000..5a6ee3fa0a6
--- /dev/null
+++ b/benchmarks/benchmarks/alignment.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+import xarray as xr
+
+from . import parameterized, requires_dask
+
+ntime = 365 * 30
+nx = 50
+ny = 50
+
+rng = np.random.default_rng(0)
+
+
+class Align:
+    def setup(self, *args, **kwargs):
+        data = rng.standard_normal((ntime, nx, ny))
+        self.ds = xr.Dataset(
+            {"temperature": (("time", "x", "y"), data)},
+            coords={
+                "time": xr.date_range("2000", periods=ntime),
+                "x": np.arange(nx),
+                "y": np.arange(ny),
+            },
+        )
+        self.year = self.ds.time.dt.year
+        self.idx = np.unique(rng.integers(low=0, high=ntime, size=ntime // 2))
+        self.year_subset = self.year.isel(time=self.idx)
+
+    @parameterized(["join"], [("outer", "inner", "left", "right", "exact", "override")])
+    def time_already_aligned(self, join):
+        xr.align(self.ds, self.year, join=join)
+
+    @parameterized(["join"], [("outer", "inner", "left", "right")])
+    def time_not_aligned(self, join):
+        xr.align(self.ds, self.year[-100:], join=join)
+
+    @parameterized(["join"], [("outer", "inner", "left", "right")])
+    def time_not_aligned_random_integers(self, join):
+        xr.align(self.ds, self.year_subset, join=join)
+
+
+class AlignCFTime(Align):
+    def setup(self, *args, **kwargs):
+        super().setup()
+        self.ds["time"] = xr.date_range("2000", periods=ntime, calendar="noleap")
+        self.year = self.ds.time.dt.year
+        self.year_subset = self.year.isel(time=self.idx)
+
+
+class AlignDask(Align):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup()
+        self.ds = self.ds.chunk({"time": 100})
diff --git a/benchmarks/benchmarks/coding.py b/benchmarks/benchmarks/coding.py
new file mode 100644
index 00000000000..c39555243c0
--- /dev/null
+++ b/benchmarks/benchmarks/coding.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+import xarray as xr
+
+from . import parameterized
+
+
+@parameterized(["calendar"], [("standard", "noleap")])
+class EncodeCFDatetime:
+    def setup(self, calendar):
+        self.units = "days since 2000-01-01"
+        self.dtype = np.dtype("int64")
+        self.times = xr.date_range(
+            "2000", freq="D", periods=10000, calendar=calendar
+        ).values
+
+    def time_encode_cf_datetime(self, calendar):
+        xr.coding.times.encode_cf_datetime(self.times, self.units, calendar, self.dtype)
diff --git a/benchmarks/benchmarks/combine.py b/benchmarks/benchmarks/combine.py
new file mode 100644
index 00000000000..772d888306c
--- /dev/null
+++ b/benchmarks/benchmarks/combine.py
@@ -0,0 +1,79 @@
+import numpy as np
+
+import xarray as xr
+
+from . import requires_dask
+
+
+class Combine1d:
+    """Benchmark concatenating and merging large datasets"""
+
+    def setup(self) -> None:
+        """Create 2 datasets with two different variables"""
+
+        t_size = 8000
+        t = np.arange(t_size)
+        data = np.random.randn(t_size)
+
+        self.dsA0 = xr.Dataset({"A": xr.DataArray(data, coords={"T": t}, dims=("T"))})
+        self.dsA1 = xr.Dataset(
+            {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T"))}
+        )
+
+    def time_combine_by_coords(self) -> None:
+        """Also has to load and arrange t coordinate"""
+        datasets = [self.dsA0, self.dsA1]
+
+        xr.combine_by_coords(datasets)
+
+
+class Combine1dDask(Combine1d):
+    """Benchmark concatenating and merging large datasets"""
+
+    def setup(self) -> None:
+        """Create 2 datasets with two different variables"""
+        requires_dask()
+
+        t_size = 8000
+        t = np.arange(t_size)
+        var = xr.Variable(dims=("T",), data=np.random.randn(t_size)).chunk()
+
+        data_vars = {f"long_name_{v}": ("T", var) for v in range(500)}
+
+        self.dsA0 = xr.Dataset(data_vars, coords={"T": t})
+        self.dsA1 = xr.Dataset(data_vars, coords={"T": t + t_size})
+
+
+class Combine3d:
+    """Benchmark concatenating and merging large datasets"""
+
+    def setup(self):
+        """Create 4 datasets with two different variables"""
+
+        t_size, x_size, y_size = 50, 450, 400
+        t = np.arange(t_size)
+        data = np.random.randn(t_size, x_size, y_size)
+
+        self.dsA0 = xr.Dataset(
+            {"A": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
+        )
+        self.dsA1 = xr.Dataset(
+            {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
+        )
+        self.dsB0 = xr.Dataset(
+            {"B": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
+        )
+        self.dsB1 = xr.Dataset(
+            {"B": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
+        )
+
+    def time_combine_nested(self):
+        datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]]
+
+        xr.combine_nested(datasets, concat_dim=[None, "T"])
+
+    def time_combine_by_coords(self):
+        """Also has to load and arrange t coordinate"""
+        datasets = [self.dsA0, self.dsA1, self.dsB0, self.dsB1]
+
+        xr.combine_by_coords(datasets)
diff --git a/benchmarks/benchmarks/dataarray_missing.py b/benchmarks/benchmarks/dataarray_missing.py
new file mode 100644
index 00000000000..83de65b7fe4
--- /dev/null
+++ b/benchmarks/benchmarks/dataarray_missing.py
@@ -0,0 +1,72 @@
+import pandas as pd
+
+import xarray as xr
+
+from . import parameterized, randn, requires_dask
+
+
+def make_bench_data(shape, frac_nan, chunks):
+    vals = randn(shape, frac_nan)
+    coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])}
+    da = xr.DataArray(vals, dims=("time", "x", "y"), coords=coords)
+
+    if chunks is not None:
+        da = da.chunk(chunks)
+
+    return da
+
+
+class DataArrayMissingInterpolateNA:
+    def setup(self, shape, chunks, limit):
+        if chunks is not None:
+            requires_dask()
+        self.da = make_bench_data(shape, 0.1, chunks)
+
+    @parameterized(
+        ["shape", "chunks", "limit"],
+        (
+            [(365, 75, 75)],
+            [None, {"x": 25, "y": 25}],
+            [None, 3],
+        ),
+    )
+    def time_interpolate_na(self, shape, chunks, limit):
+        actual = self.da.interpolate_na(dim="time", method="linear", limit=limit)
+
+        if chunks is not None:
+            actual = actual.compute()
+
+
+class DataArrayMissingBottleneck:
+    def setup(self, shape, chunks, limit):
+        if chunks is not None:
+            requires_dask()
+        self.da = make_bench_data(shape, 0.1, chunks)
+
+    @parameterized(
+        ["shape", "chunks", "limit"],
+        (
+            [(365, 75, 75)],
+            [None, {"x": 25, "y": 25}],
+            [None, 3],
+        ),
+    )
+    def time_ffill(self, shape, chunks, limit):
+        actual = self.da.ffill(dim="time", limit=limit)
+
+        if chunks is not None:
+            actual = actual.compute()
+
+    @parameterized(
+        ["shape", "chunks", "limit"],
+        (
+            [(365, 75, 75)],
+            [None, {"x": 25, "y": 25}],
+            [None, 3],
+        ),
+    )
+    def time_bfill(self, shape, chunks, limit):
+        actual = self.da.bfill(dim="time", limit=limit)
+
+        if chunks is not None:
+            actual = actual.compute()
diff --git a/benchmarks/benchmarks/dataset.py b/benchmarks/benchmarks/dataset.py
new file mode 100644
index 00000000000..d8a6d6df9d8
--- /dev/null
+++ b/benchmarks/benchmarks/dataset.py
@@ -0,0 +1,32 @@
+import numpy as np
+
+from xarray import Dataset
+
+from . import requires_dask
+
+
+class DatasetBinaryOp:
+    def setup(self):
+        self.ds = Dataset(
+            {
+                "a": (("x", "y"), np.ones((300, 400))),
+                "b": (("x", "y"), np.ones((300, 400))),
+            }
+        )
+        self.mean = self.ds.mean()
+        self.std = self.ds.std()
+
+    def time_normalize(self):
+        (self.ds - self.mean) / self.std
+
+
+class DatasetChunk:
+    def setup(self):
+        requires_dask()
+        self.ds = Dataset()
+        array = np.ones(1000)
+        for i in range(250):
+            self.ds[f"var{i}"] = ("x", array)
+
+    def time_chunk(self):
+        self.ds.chunk(x=(1,) * 1000)
diff --git a/benchmarks/benchmarks/dataset_io.py b/benchmarks/benchmarks/dataset_io.py
new file mode 100644
index 00000000000..b8afabe802e
--- /dev/null
+++ b/benchmarks/benchmarks/dataset_io.py
@@ -0,0 +1,755 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import _skip_slow, parameterized, randint, randn, requires_dask
+
+try:
+    import dask
+    import dask.multiprocessing
+except ImportError:
+    pass
+
+os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
+
+_ENGINES = tuple(xr.backends.list_engines().keys() - {"store"})
+
+
+class IOSingleNetCDF:
+    """
+    A few examples that benchmark reading/writing a single netCDF file with
+    xarray
+    """
+
+    timeout = 300.0
+    repeat = 1
+    number = 5
+
+    def make_ds(self):
+        # single Dataset
+        self.ds = xr.Dataset()
+        self.nt = 1000
+        self.nx = 90
+        self.ny = 45
+
+        self.block_chunks = {
+            "time": self.nt / 4,
+            "lon": self.nx / 3,
+            "lat": self.ny / 3,
+        }
+
+        self.time_chunks = {"time": int(self.nt / 36)}
+
+        times = pd.date_range("1970-01-01", periods=self.nt, freq="D")
+        lons = xr.DataArray(
+            np.linspace(0, 360, self.nx),
+            dims=("lon",),
+            attrs={"units": "degrees east", "long_name": "longitude"},
+        )
+        lats = xr.DataArray(
+            np.linspace(-90, 90, self.ny),
+            dims=("lat",),
+            attrs={"units": "degrees north", "long_name": "latitude"},
+        )
+        self.ds["foo"] = xr.DataArray(
+            randn((self.nt, self.nx, self.ny), frac_nan=0.2),
+            coords={"lon": lons, "lat": lats, "time": times},
+            dims=("time", "lon", "lat"),
+            name="foo",
+            attrs={"units": "foo units", "description": "a description"},
+        )
+        self.ds["bar"] = xr.DataArray(
+            randn((self.nt, self.nx, self.ny), frac_nan=0.2),
+            coords={"lon": lons, "lat": lats, "time": times},
+            dims=("time", "lon", "lat"),
+            name="bar",
+            attrs={"units": "bar units", "description": "a description"},
+        )
+        self.ds["baz"] = xr.DataArray(
+            randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
+            coords={"lon": lons, "lat": lats},
+            dims=("lon", "lat"),
+            name="baz",
+            attrs={"units": "baz units", "description": "a description"},
+        )
+
+        self.ds.attrs = {"history": "created for xarray benchmarking"}
+
+        self.oinds = {
+            "time": randint(0, self.nt, 120),
+            "lon": randint(0, self.nx, 20),
+            "lat": randint(0, self.ny, 10),
+        }
+        self.vinds = {
+            "time": xr.DataArray(randint(0, self.nt, 120), dims="x"),
+            "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"),
+            "lat": slice(3, 20),
+        }
+
+
+class IOWriteSingleNetCDF3(IOSingleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        self.format = "NETCDF3_64BIT"
+        self.make_ds()
+
+    def time_write_dataset_netcdf4(self):
+        self.ds.to_netcdf("test_netcdf4_write.nc", engine="netcdf4", format=self.format)
+
+    def time_write_dataset_scipy(self):
+        self.ds.to_netcdf("test_scipy_write.nc", engine="scipy", format=self.format)
+
+
+class IOReadSingleNetCDF4(IOSingleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        self.make_ds()
+
+        self.filepath = "test_single_file.nc4.nc"
+        self.format = "NETCDF4"
+        self.ds.to_netcdf(self.filepath, format=self.format)
+
+    def time_load_dataset_netcdf4(self):
+        xr.open_dataset(self.filepath, engine="netcdf4").load()
+
+    def time_orthogonal_indexing(self):
+        ds = xr.open_dataset(self.filepath, engine="netcdf4")
+        ds = ds.isel(**self.oinds).load()
+
+    def time_vectorized_indexing(self):
+        ds = xr.open_dataset(self.filepath, engine="netcdf4")
+        ds = ds.isel(**self.vinds).load()
+
+
+class IOReadSingleNetCDF3(IOReadSingleNetCDF4):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        self.make_ds()
+
+        self.filepath = "test_single_file.nc3.nc"
+        self.format = "NETCDF3_64BIT"
+        self.ds.to_netcdf(self.filepath, format=self.format)
+
+    def time_load_dataset_scipy(self):
+        xr.open_dataset(self.filepath, engine="scipy").load()
+
+    def time_orthogonal_indexing(self):
+        ds = xr.open_dataset(self.filepath, engine="scipy")
+        ds = ds.isel(**self.oinds).load()
+
+    def time_vectorized_indexing(self):
+        ds = xr.open_dataset(self.filepath, engine="scipy")
+        ds = ds.isel(**self.vinds).load()
+
+
+class IOReadSingleNetCDF4Dask(IOSingleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+
+        self.filepath = "test_single_file.nc4.nc"
+        self.format = "NETCDF4"
+        self.ds.to_netcdf(self.filepath, format=self.format)
+
+    def time_load_dataset_netcdf4_with_block_chunks(self):
+        xr.open_dataset(
+            self.filepath, engine="netcdf4", chunks=self.block_chunks
+        ).load()
+
+    def time_load_dataset_netcdf4_with_block_chunks_oindexing(self):
+        ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks)
+        ds = ds.isel(**self.oinds).load()
+
+    def time_load_dataset_netcdf4_with_block_chunks_vindexing(self):
+        ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks)
+        ds = ds.isel(**self.vinds).load()
+
+    def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_dataset(
+                self.filepath, engine="netcdf4", chunks=self.block_chunks
+            ).load()
+
+    def time_load_dataset_netcdf4_with_time_chunks(self):
+        xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.time_chunks).load()
+
+    def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_dataset(
+                self.filepath, engine="netcdf4", chunks=self.time_chunks
+            ).load()
+
+
+class IOReadSingleNetCDF3Dask(IOReadSingleNetCDF4Dask):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+
+        self.filepath = "test_single_file.nc3.nc"
+        self.format = "NETCDF3_64BIT"
+        self.ds.to_netcdf(self.filepath, format=self.format)
+
+    def time_load_dataset_scipy_with_block_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_dataset(
+                self.filepath, engine="scipy", chunks=self.block_chunks
+            ).load()
+
+    def time_load_dataset_scipy_with_block_chunks_oindexing(self):
+        ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks)
+        ds = ds.isel(**self.oinds).load()
+
+    def time_load_dataset_scipy_with_block_chunks_vindexing(self):
+        ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks)
+        ds = ds.isel(**self.vinds).load()
+
+    def time_load_dataset_scipy_with_time_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_dataset(
+                self.filepath, engine="scipy", chunks=self.time_chunks
+            ).load()
+
+
+class IOMultipleNetCDF:
+    """
+    A few examples that benchmark reading/writing multiple netCDF files with
+    xarray
+    """
+
+    timeout = 300.0
+    repeat = 1
+    number = 5
+
+    def make_ds(self, nfiles=10):
+        # multiple Dataset
+        self.ds = xr.Dataset()
+        self.nt = 1000
+        self.nx = 90
+        self.ny = 45
+        self.nfiles = nfiles
+
+        self.block_chunks = {
+            "time": self.nt / 4,
+            "lon": self.nx / 3,
+            "lat": self.ny / 3,
+        }
+
+        self.time_chunks = {"time": int(self.nt / 36)}
+
+        self.time_vars = np.split(
+            pd.date_range("1970-01-01", periods=self.nt, freq="D"), self.nfiles
+        )
+
+        self.ds_list = []
+        self.filenames_list = []
+        for i, times in enumerate(self.time_vars):
+            ds = xr.Dataset()
+            nt = len(times)
+            lons = xr.DataArray(
+                np.linspace(0, 360, self.nx),
+                dims=("lon",),
+                attrs={"units": "degrees east", "long_name": "longitude"},
+            )
+            lats = xr.DataArray(
+                np.linspace(-90, 90, self.ny),
+                dims=("lat",),
+                attrs={"units": "degrees north", "long_name": "latitude"},
+            )
+            ds["foo"] = xr.DataArray(
+                randn((nt, self.nx, self.ny), frac_nan=0.2),
+                coords={"lon": lons, "lat": lats, "time": times},
+                dims=("time", "lon", "lat"),
+                name="foo",
+                attrs={"units": "foo units", "description": "a description"},
+            )
+            ds["bar"] = xr.DataArray(
+                randn((nt, self.nx, self.ny), frac_nan=0.2),
+                coords={"lon": lons, "lat": lats, "time": times},
+                dims=("time", "lon", "lat"),
+                name="bar",
+                attrs={"units": "bar units", "description": "a description"},
+            )
+            ds["baz"] = xr.DataArray(
+                randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
+                coords={"lon": lons, "lat": lats},
+                dims=("lon", "lat"),
+                name="baz",
+                attrs={"units": "baz units", "description": "a description"},
+            )
+
+            ds.attrs = {"history": "created for xarray benchmarking"}
+
+            self.ds_list.append(ds)
+            self.filenames_list.append(f"test_netcdf_{i}.nc")
+
+
+class IOWriteMultipleNetCDF3(IOMultipleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        self.make_ds()
+        self.format = "NETCDF3_64BIT"
+
+    def time_write_dataset_netcdf4(self):
+        xr.save_mfdataset(
+            self.ds_list, self.filenames_list, engine="netcdf4", format=self.format
+        )
+
+    def time_write_dataset_scipy(self):
+        xr.save_mfdataset(
+            self.ds_list, self.filenames_list, engine="scipy", format=self.format
+        )
+
+
+class IOReadMultipleNetCDF4(IOMultipleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+        self.format = "NETCDF4"
+        xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
+
+    def time_load_dataset_netcdf4(self):
+        xr.open_mfdataset(self.filenames_list, engine="netcdf4").load()
+
+    def time_open_dataset_netcdf4(self):
+        xr.open_mfdataset(self.filenames_list, engine="netcdf4")
+
+
+class IOReadMultipleNetCDF3(IOReadMultipleNetCDF4):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+        self.format = "NETCDF3_64BIT"
+        xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
+
+    def time_load_dataset_scipy(self):
+        xr.open_mfdataset(self.filenames_list, engine="scipy").load()
+
+    def time_open_dataset_scipy(self):
+        xr.open_mfdataset(self.filenames_list, engine="scipy")
+
+
+class IOReadMultipleNetCDF4Dask(IOMultipleNetCDF):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+        self.format = "NETCDF4"
+        xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
+
+    def time_load_dataset_netcdf4_with_block_chunks(self):
+        xr.open_mfdataset(
+            self.filenames_list, engine="netcdf4", chunks=self.block_chunks
+        ).load()
+
+    def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="netcdf4", chunks=self.block_chunks
+            ).load()
+
+    def time_load_dataset_netcdf4_with_time_chunks(self):
+        xr.open_mfdataset(
+            self.filenames_list, engine="netcdf4", chunks=self.time_chunks
+        ).load()
+
+    def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="netcdf4", chunks=self.time_chunks
+            ).load()
+
+    def time_open_dataset_netcdf4_with_block_chunks(self):
+        xr.open_mfdataset(
+            self.filenames_list, engine="netcdf4", chunks=self.block_chunks
+        )
+
+    def time_open_dataset_netcdf4_with_block_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="netcdf4", chunks=self.block_chunks
+            )
+
+    def time_open_dataset_netcdf4_with_time_chunks(self):
+        xr.open_mfdataset(
+            self.filenames_list, engine="netcdf4", chunks=self.time_chunks
+        )
+
+    def time_open_dataset_netcdf4_with_time_chunks_multiprocessing(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="netcdf4", chunks=self.time_chunks
+            )
+
+
+class IOReadMultipleNetCDF3Dask(IOReadMultipleNetCDF4Dask):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_ds()
+        self.format = "NETCDF3_64BIT"
+        xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
+
+    def time_load_dataset_scipy_with_block_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="scipy", chunks=self.block_chunks
+            ).load()
+
+    def time_load_dataset_scipy_with_time_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="scipy", chunks=self.time_chunks
+            ).load()
+
+    def time_open_dataset_scipy_with_block_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="scipy", chunks=self.block_chunks
+            )
+
+    def time_open_dataset_scipy_with_time_chunks(self):
+        with dask.config.set(scheduler="multiprocessing"):
+            xr.open_mfdataset(
+                self.filenames_list, engine="scipy", chunks=self.time_chunks
+            )
+
+
+def create_delayed_write():
+    import dask.array as da
+
+    vals = da.random.random(300, chunks=(1,))
+    ds = xr.Dataset({"vals": (["a"], vals)})
+    return ds.to_netcdf("file.nc", engine="netcdf4", compute=False)
+
+
+class IONestedDataTree:
+    """
+    A few examples that benchmark reading/writing a heavily nested netCDF datatree with
+    xarray
+    """
+
+    timeout = 300.0
+    repeat = 1
+    number = 5
+
+    def make_datatree(self, nchildren=10):
+        # multiple Dataset
+        self.ds = xr.Dataset()
+        self.nt = 1000
+        self.nx = 90
+        self.ny = 45
+        self.nchildren = nchildren
+
+        self.block_chunks = {
+            "time": self.nt / 4,
+            "lon": self.nx / 3,
+            "lat": self.ny / 3,
+        }
+
+        self.time_chunks = {"time": int(self.nt / 36)}
+
+        times = pd.date_range("1970-01-01", periods=self.nt, freq="D")
+        lons = xr.DataArray(
+            np.linspace(0, 360, self.nx),
+            dims=("lon",),
+            attrs={"units": "degrees east", "long_name": "longitude"},
+        )
+        lats = xr.DataArray(
+            np.linspace(-90, 90, self.ny),
+            dims=("lat",),
+            attrs={"units": "degrees north", "long_name": "latitude"},
+        )
+        self.ds["foo"] = xr.DataArray(
+            randn((self.nt, self.nx, self.ny), frac_nan=0.2),
+            coords={"lon": lons, "lat": lats, "time": times},
+            dims=("time", "lon", "lat"),
+            name="foo",
+            attrs={"units": "foo units", "description": "a description"},
+        )
+        self.ds["bar"] = xr.DataArray(
+            randn((self.nt, self.nx, self.ny), frac_nan=0.2),
+            coords={"lon": lons, "lat": lats, "time": times},
+            dims=("time", "lon", "lat"),
+            name="bar",
+            attrs={"units": "bar units", "description": "a description"},
+        )
+        self.ds["baz"] = xr.DataArray(
+            randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
+            coords={"lon": lons, "lat": lats},
+            dims=("lon", "lat"),
+            name="baz",
+            attrs={"units": "baz units", "description": "a description"},
+        )
+
+        self.ds.attrs = {"history": "created for xarray benchmarking"}
+
+        self.oinds = {
+            "time": randint(0, self.nt, 120),
+            "lon": randint(0, self.nx, 20),
+            "lat": randint(0, self.ny, 10),
+        }
+        self.vinds = {
+            "time": xr.DataArray(randint(0, self.nt, 120), dims="x"),
+            "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"),
+            "lat": slice(3, 20),
+        }
+        root = {f"group_{group}": self.ds for group in range(self.nchildren)}
+        nested_tree1 = {
+            f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren)
+        }
+        nested_tree2 = {
+            f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset(
+                name="a"
+            )
+            for group in range(self.nchildren)
+        }
+        nested_tree3 = {
+            f"group_{group}/subgroup_2/sub-subgroup_1": self.ds
+            for group in range(self.nchildren)
+        }
+        dtree = root | nested_tree1 | nested_tree2 | nested_tree3
+        self.dtree = xr.DataTree.from_dict(dtree)
+
+
+class IOReadDataTreeNetCDF4(IONestedDataTree):
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.make_datatree()
+        self.format = "NETCDF4"
+        self.filepath = "datatree.nc4.nc"
+        dtree = self.dtree
+        dtree.to_netcdf(filepath=self.filepath)
+
+    def time_load_datatree_netcdf4(self):
+        xr.open_datatree(self.filepath, engine="netcdf4").load()
+
+    def time_open_datatree_netcdf4(self):
+        xr.open_datatree(self.filepath, engine="netcdf4")
+
+
+class IOWriteNetCDFDask:
+    timeout = 60
+    repeat = 1
+    number = 5
+
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        self.write = create_delayed_write()
+
+    def time_write(self):
+        self.write.compute()
+
+
+class IOWriteNetCDFDaskDistributed:
+    def setup(self):
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+
+        requires_dask()
+
+        try:
+            import distributed
+        except ImportError as err:
+            raise NotImplementedError() from err
+
+        self.client = distributed.Client()
+        self.write = create_delayed_write()
+
+    def cleanup(self):
+        self.client.shutdown()
+
+    def time_write(self):
+        self.write.compute()
+
+
+class IOReadSingleFile(IOSingleNetCDF):
+    def setup(self, *args, **kwargs):
+        self.make_ds()
+
+        self.filepaths = {}
+        for engine in _ENGINES:
+            self.filepaths[engine] = f"test_single_file_with_{engine}.nc"
+            self.ds.to_netcdf(self.filepaths[engine], engine=engine)
+
+    @parameterized(["engine", "chunks"], (_ENGINES, [None, {}]))
+    def time_read_dataset(self, engine, chunks):
+        xr.open_dataset(self.filepaths[engine], engine=engine, chunks=chunks)
+
+
+class IOReadCustomEngine:
+    def setup(self, *args, **kwargs):
+        """
+        The custom backend does the bare minimum to be considered a lazy backend. But
+        the data in it is still in memory so slow file reading shouldn't affect the
+        results.
+        """
+        requires_dask()
+
+        @dataclass
+        class PerformanceBackendArray(xr.backends.BackendArray):
+            filename_or_obj: str | os.PathLike | None
+            shape: tuple[int, ...]
+            dtype: np.dtype
+            lock: xr.backends.locks.SerializableLock
+
+            def __getitem__(self, key: tuple):
+                return xr.core.indexing.explicit_indexing_adapter(
+                    key,
+                    self.shape,
+                    xr.core.indexing.IndexingSupport.BASIC,
+                    self._raw_indexing_method,
+                )
+
+            def _raw_indexing_method(self, key: tuple):
+                raise NotImplementedError
+
+        @dataclass
+        class PerformanceStore(xr.backends.common.AbstractWritableDataStore):
+            manager: xr.backends.CachingFileManager
+            mode: str | None = None
+            lock: xr.backends.locks.SerializableLock | None = None
+            autoclose: bool = False
+
+            def __post_init__(self):
+                self.filename = self.manager._args[0]
+
+            @classmethod
+            def open(
+                cls,
+                filename: str | os.PathLike | None,
+                mode: str = "r",
+                lock: xr.backends.locks.SerializableLock | None = None,
+                autoclose: bool = False,
+            ):
+                locker = lock or xr.backends.locks.SerializableLock()
+
+                manager = xr.backends.CachingFileManager(
+                    xr.backends.DummyFileManager,
+                    filename,
+                    mode=mode,
+                )
+                return cls(manager, mode=mode, lock=locker, autoclose=autoclose)
+
+            def load(self) -> tuple:
+                """
+                Load a bunch of test data quickly.
+
+                Normally this method would've opened a file and parsed it.
+                """
+                n_variables = 2000
+
+                # Important to have a shape and dtype for lazy loading.
+                shape = (1000,)
+                dtype = np.dtype(int)
+                variables = {
+                    f"long_variable_name_{v}": xr.Variable(
+                        data=PerformanceBackendArray(
+                            self.filename, shape, dtype, self.lock
+                        ),
+                        dims=("time",),
+                        fastpath=True,
+                    )
+                    for v in range(n_variables)
+                }
+                attributes = {}
+
+                return variables, attributes
+
+        class PerformanceBackend(xr.backends.BackendEntrypoint):
+            def open_dataset(
+                self,
+                filename_or_obj: str | os.PathLike | None,
+                drop_variables: tuple[str, ...] | None = None,
+                *,
+                mask_and_scale=True,
+                decode_times=True,
+                concat_characters=True,
+                decode_coords=True,
+                use_cftime=None,
+                decode_timedelta=None,
+                lock=None,
+                **kwargs,
+            ) -> xr.Dataset:
+                filename_or_obj = xr.backends.common._normalize_path(filename_or_obj)
+                store = PerformanceStore.open(filename_or_obj, lock=lock)
+
+                store_entrypoint = xr.backends.store.StoreBackendEntrypoint()
+
+                ds = store_entrypoint.open_dataset(
+                    store,
+                    mask_and_scale=mask_and_scale,
+                    decode_times=decode_times,
+                    concat_characters=concat_characters,
+                    decode_coords=decode_coords,
+                    drop_variables=drop_variables,
+                    use_cftime=use_cftime,
+                    decode_timedelta=decode_timedelta,
+                )
+                return ds
+
+        self.engine = PerformanceBackend
+
+    @parameterized(["chunks"], ([None, {}, {"time": 10}]))
+    def time_open_dataset(self, chunks):
+        """
+        Time how fast xr.open_dataset is without the slow data reading part.
+        Test with and without dask.
+        """
+        xr.open_dataset(None, engine=self.engine, chunks=chunks)
diff --git a/benchmarks/benchmarks/datatree.py b/benchmarks/benchmarks/datatree.py
new file mode 100644
index 00000000000..9f1774f60ac
--- /dev/null
+++ b/benchmarks/benchmarks/datatree.py
@@ -0,0 +1,15 @@
+import xarray as xr
+from xarray.core.datatree import DataTree
+
+
+class Datatree:
+    def setup(self):
+        run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})})
+        self.d_few = {"run1": run1}
+        self.d_many = {f"run{i}": xr.Dataset({"a": 1}) for i in range(100)}
+
+    def time_from_dict_few(self):
+        DataTree.from_dict(self.d_few)
+
+    def time_from_dict_many(self):
+        DataTree.from_dict(self.d_many)
diff --git a/benchmarks/benchmarks/groupby.py b/benchmarks/benchmarks/groupby.py
new file mode 100644
index 00000000000..681fd6ed734
--- /dev/null
+++ b/benchmarks/benchmarks/groupby.py
@@ -0,0 +1,191 @@
+# import flox to avoid the cost of first import
+import cftime
+import flox.xarray  # noqa: F401
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import _skip_slow, parameterized, requires_dask
+
+
+class GroupBy:
+    def setup(self, *args, **kwargs):
+        self.n = 100
+        self.ds1d = xr.Dataset(
+            {
+                "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]),
+                "b": xr.DataArray(np.arange(2 * self.n)),
+                "c": xr.DataArray(np.arange(2 * self.n)),
+            }
+        )
+        self.ds2d = self.ds1d.expand_dims(z=10).copy()
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+        self.ds2d_mean = self.ds2d.groupby("b").mean()
+
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").groupby("b")
+
+    @parameterized(
+        ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
+    )
+    def time_agg_small_num_groups(self, method, ndim, use_flox):
+        ds = getattr(self, f"ds{ndim}d")
+        with xr.set_options(use_flox=use_flox):
+            getattr(ds.groupby("a"), method)().compute()
+
+    @parameterized(
+        ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
+    )
+    def time_agg_large_num_groups(self, method, ndim, use_flox):
+        ds = getattr(self, f"ds{ndim}d")
+        with xr.set_options(use_flox=use_flox):
+            getattr(ds.groupby("b"), method)().compute()
+
+    def time_binary_op_1d(self):
+        (self.ds1d.groupby("b") - self.ds1d_mean).compute()
+
+    def time_binary_op_2d(self):
+        (self.ds2d.groupby("b") - self.ds2d_mean).compute()
+
+    def peakmem_binary_op_1d(self):
+        (self.ds1d.groupby("b") - self.ds1d_mean).compute()
+
+    def peakmem_binary_op_2d(self):
+        (self.ds2d.groupby("b") - self.ds2d_mean).compute()
+
+
+class GroupByDask(GroupBy):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup(**kwargs)
+
+        self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2))
+        self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50})
+        self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2))
+        self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5})
+        self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
+        self.ds2d_mean = self.ds2d.groupby("b").mean().compute()
+
+
+# TODO: These don't work now because we are calling `.compute` explicitly.
+class GroupByPandasDataFrame(GroupBy):
+    """Run groupby tests using pandas DataFrame."""
+
+    def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
+        super().setup(**kwargs)
+        self.ds1d = self.ds1d.to_dataframe()
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+
+    def time_binary_op_2d(self):
+        raise NotImplementedError
+
+    def peakmem_binary_op_2d(self):
+        raise NotImplementedError
+
+
+class GroupByDaskDataFrame(GroupBy):
+    """Run groupby tests using dask DataFrame."""
+
+    def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
+        requires_dask()
+        super().setup(**kwargs)
+        self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dask_dataframe()
+        self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
+
+    def time_binary_op_2d(self):
+        raise NotImplementedError
+
+    def peakmem_binary_op_2d(self):
+        raise NotImplementedError
+
+
+class Resample:
+    def setup(self, *args, **kwargs):
+        self.ds1d = xr.Dataset(
+            {
+                "b": ("time", np.arange(365.0 * 24)),
+            },
+            coords={"time": pd.date_range("2001-01-01", freq="h", periods=365 * 24)},
+        )
+        self.ds2d = self.ds1d.expand_dims(z=10)
+        self.ds1d_mean = self.ds1d.resample(time="48h").mean()
+        self.ds2d_mean = self.ds2d.resample(time="48h").mean()
+
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").resample(time="D")
+
+    @parameterized(
+        ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
+    )
+    def time_agg_small_num_groups(self, method, ndim, use_flox):
+        ds = getattr(self, f"ds{ndim}d")
+        with xr.set_options(use_flox=use_flox):
+            getattr(ds.resample(time="3ME"), method)().compute()
+
+    @parameterized(
+        ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
+    )
+    def time_agg_large_num_groups(self, method, ndim, use_flox):
+        ds = getattr(self, f"ds{ndim}d")
+        with xr.set_options(use_flox=use_flox):
+            getattr(ds.resample(time="48h"), method)().compute()
+
+
+class ResampleDask(Resample):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup(**kwargs)
+        self.ds1d = self.ds1d.chunk({"time": 50})
+        self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})
+
+
+class ResampleCFTime(Resample):
+    def setup(self, *args, **kwargs):
+        self.ds1d = xr.Dataset(
+            {
+                "b": ("time", np.arange(365.0 * 24)),
+            },
+            coords={
+                "time": xr.date_range(
+                    "2001-01-01", freq="h", periods=365 * 24, calendar="noleap"
+                )
+            },
+        )
+        self.ds2d = self.ds1d.expand_dims(z=10)
+        self.ds1d_mean = self.ds1d.resample(time="48h").mean()
+        self.ds2d_mean = self.ds2d.resample(time="48h").mean()
+
+
+@parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]])
+class GroupByLongTime:
+    def setup(self, use_cftime, use_flox):
+        arr = np.random.randn(10, 10, 365 * 30)
+        time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
+
+        # GH9426 - deep-copying CFTime object arrays is weirdly slow
+        asda = xr.DataArray(time)
+        labeled_time = []
+        for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
+            labeled_time.append(cftime.datetime(year, month, 1))
+
+        self.da = xr.DataArray(
+            arr,
+            dims=("y", "x", "time"),
+            coords={"time": time, "time2": ("time", labeled_time)},
+        )
+
+    def time_setup(self, use_cftime, use_flox):
+        self.da.groupby("time.month")
+
+    def time_mean(self, use_cftime, use_flox):
+        with xr.set_options(use_flox=use_flox):
+            self.da.groupby("time.year").mean()
diff --git a/benchmarks/benchmarks/import.py b/benchmarks/benchmarks/import.py
new file mode 100644
index 00000000000..f9d0bcc336b
--- /dev/null
+++ b/benchmarks/benchmarks/import.py
@@ -0,0 +1,18 @@
+class Import:
+    """Benchmark importing xarray"""
+
+    def timeraw_import_xarray(self):
+        return "import xarray"
+
+    def timeraw_import_xarray_plot(self):
+        return "import xarray.plot"
+
+    def timeraw_import_xarray_backends(self):
+        return """
+        from xarray.backends import list_engines
+        list_engines()
+        """
+
+    def timeraw_import_xarray_only(self):
+        # import numpy and pandas in the setup stage
+        return "import xarray", "import numpy, pandas"
diff --git a/benchmarks/benchmarks/indexing.py b/benchmarks/benchmarks/indexing.py
new file mode 100644
index 00000000000..50bb8a5ee99
--- /dev/null
+++ b/benchmarks/benchmarks/indexing.py
@@ -0,0 +1,201 @@
+import os
+
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import parameterized, randint, randn, requires_dask
+
+nx = 2000
+ny = 1000
+nt = 500
+
+basic_indexes = {
+    "1scalar": {"x": 0},
+    "1slice": {"x": slice(0, 3)},
+    "1slice-1scalar": {"x": 0, "y": slice(None, None, 3)},
+    "2slicess-1scalar": {"x": slice(3, -3, 3), "y": 1, "t": slice(None, -3, 3)},
+}
+
+basic_assignment_values = {
+    "1scalar": 0,
+    "1slice": xr.DataArray(randn((3, ny), frac_nan=0.1), dims=["x", "y"]),
+    "1slice-1scalar": xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=["y"]),
+    "2slicess-1scalar": xr.DataArray(
+        randn(np.empty(nx)[slice(3, -3, 3)].size, frac_nan=0.1), dims=["x"]
+    ),
+}
+
+outer_indexes = {
+    "1d": {"x": randint(0, nx, 400)},
+    "2d": {"x": randint(0, nx, 500), "y": randint(0, ny, 400)},
+    "2d-1scalar": {"x": randint(0, nx, 100), "y": 1, "t": randint(0, nt, 400)},
+}
+
+outer_assignment_values = {
+    "1d": xr.DataArray(randn((400, ny), frac_nan=0.1), dims=["x", "y"]),
+    "2d": xr.DataArray(randn((500, 400), frac_nan=0.1), dims=["x", "y"]),
+    "2d-1scalar": xr.DataArray(randn(100, frac_nan=0.1), dims=["x"]),
+}
+
+
+def make_vectorized_indexes(n_index):
+    return {
+        "1-1d": {"x": xr.DataArray(randint(0, nx, n_index), dims="a")},
+        "2-1d": {
+            "x": xr.DataArray(randint(0, nx, n_index), dims="a"),
+            "y": xr.DataArray(randint(0, ny, n_index), dims="a"),
+        },
+        "3-2d": {
+            "x": xr.DataArray(
+                randint(0, nx, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
+            ),
+            "y": xr.DataArray(
+                randint(0, ny, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
+            ),
+            "t": xr.DataArray(
+                randint(0, nt, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
+            ),
+        },
+    }
+
+
+vectorized_indexes = make_vectorized_indexes(400)
+big_vectorized_indexes = make_vectorized_indexes(400_000)
+
+vectorized_assignment_values = {
+    "1-1d": xr.DataArray(randn((400, ny)), dims=["a", "y"], coords={"a": randn(400)}),
+    "2-1d": xr.DataArray(randn(400), dims=["a"], coords={"a": randn(400)}),
+    "3-2d": xr.DataArray(
+        randn((4, 100)), dims=["a", "b"], coords={"a": randn(4), "b": randn(100)}
+    ),
+}
+
+
+class Base:
+    def setup(self, key):
+        self.ds = xr.Dataset(
+            {
+                "var1": (("x", "y"), randn((nx, ny), frac_nan=0.1)),
+                "var2": (("x", "t"), randn((nx, nt))),
+                "var3": (("t",), randn(nt)),
+            },
+            coords={
+                "x": np.arange(nx),
+                "y": np.linspace(0, 1, ny),
+                "t": pd.date_range("1970-01-01", periods=nt, freq="D"),
+                "x_coords": ("x", np.linspace(1.1, 2.1, nx)),
+            },
+        )
+        # Benchmark how indexing is slowed down by adding many scalar variable
+        # to the dataset
+        # https://github.com/pydata/xarray/pull/9003
+        self.ds_large = self.ds.merge({f"extra_var{i}": i for i in range(400)})
+
+
+class Indexing(Base):
+    @parameterized(["key"], [list(basic_indexes.keys())])
+    def time_indexing_basic(self, key):
+        self.ds.isel(**basic_indexes[key]).load()
+
+    @parameterized(["key"], [list(outer_indexes.keys())])
+    def time_indexing_outer(self, key):
+        self.ds.isel(**outer_indexes[key]).load()
+
+    @parameterized(["key"], [list(vectorized_indexes.keys())])
+    def time_indexing_vectorized(self, key):
+        self.ds.isel(**vectorized_indexes[key]).load()
+
+    @parameterized(["key"], [list(basic_indexes.keys())])
+    def time_indexing_basic_ds_large(self, key):
+        # https://github.com/pydata/xarray/pull/9003
+        self.ds_large.isel(**basic_indexes[key]).load()
+
+
+class IndexingOnly(Base):
+    @parameterized(["key"], [list(basic_indexes.keys())])
+    def time_indexing_basic(self, key):
+        self.ds.isel(**basic_indexes[key])
+
+    @parameterized(["key"], [list(outer_indexes.keys())])
+    def time_indexing_outer(self, key):
+        self.ds.isel(**outer_indexes[key])
+
+    @parameterized(["key"], [list(big_vectorized_indexes.keys())])
+    def time_indexing_big_vectorized(self, key):
+        self.ds.isel(**big_vectorized_indexes[key])
+
+
+class Assignment(Base):
+    @parameterized(["key"], [list(basic_indexes.keys())])
+    def time_assignment_basic(self, key):
+        ind = basic_indexes[key]
+        val = basic_assignment_values[key]
+        self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val
+
+    @parameterized(["key"], [list(outer_indexes.keys())])
+    def time_assignment_outer(self, key):
+        ind = outer_indexes[key]
+        val = outer_assignment_values[key]
+        self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val
+
+    @parameterized(["key"], [list(vectorized_indexes.keys())])
+    def time_assignment_vectorized(self, key):
+        ind = vectorized_indexes[key]
+        val = vectorized_assignment_values[key]
+        self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val
+
+
+class IndexingDask(Indexing):
+    def setup(self, key):
+        requires_dask()
+        super().setup(key)
+        self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50})
+
+
+class BooleanIndexing:
+    # https://github.com/pydata/xarray/issues/2227
+    def setup(self):
+        self.ds = xr.Dataset(
+            {"a": ("time", np.arange(10_000_000))},
+            coords={"time": np.arange(10_000_000)},
+        )
+        self.time_filter = self.ds.time > 50_000
+
+    def time_indexing(self):
+        self.ds.isel(time=self.time_filter)
+
+
+class HugeAxisSmallSliceIndexing:
+    # https://github.com/pydata/xarray/pull/4560
+    def setup(self):
+        self.filepath = "test_indexing_huge_axis_small_slice.nc"
+        if not os.path.isfile(self.filepath):
+            xr.Dataset(
+                {"a": ("x", np.arange(10_000_000))},
+                coords={"x": np.arange(10_000_000)},
+            ).to_netcdf(self.filepath, format="NETCDF4")
+
+        self.ds = xr.open_dataset(self.filepath)
+
+    def time_indexing(self):
+        self.ds.isel(x=slice(100))
+
+    def cleanup(self):
+        self.ds.close()
+
+
+class AssignmentOptimized:
+    # https://github.com/pydata/xarray/pull/7382
+    def setup(self):
+        self.ds = xr.Dataset(coords={"x": np.arange(500_000)})
+        self.da = xr.DataArray(np.arange(500_000), dims="x")
+
+    def time_assign_no_reindex(self):
+        # assign with non-indexed DataArray of same dimension size
+        self.ds.assign(foo=self.da)
+
+    def time_assign_identical_indexes(self):
+        # fastpath index comparison (same index object)
+        self.ds.assign(foo=self.ds.x)
diff --git a/benchmarks/benchmarks/interp.py b/benchmarks/benchmarks/interp.py
new file mode 100644
index 00000000000..ca1d0a2dd89
--- /dev/null
+++ b/benchmarks/benchmarks/interp.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import parameterized, randn, requires_dask
+
+nx = 1500
+ny = 1000
+nt = 500
+
+randn_xy = randn((nx, ny), frac_nan=0.1)
+randn_xt = randn((nx, nt))
+randn_t = randn((nt,))
+
+new_x_short = np.linspace(0.3 * nx, 0.7 * nx, 100)
+new_x_long = np.linspace(0.3 * nx, 0.7 * nx, 500)
+new_y_long = np.linspace(0.1, 0.9, 500)
+
+
+class Interpolation:
+    def setup(self, *args, **kwargs):
+        self.ds = xr.Dataset(
+            {
+                "var1": (("x", "y"), randn_xy),
+                "var2": (("x", "t"), randn_xt),
+                "var3": (("t",), randn_t),
+                "var4": (("z",), np.array(["text"])),
+                "var5": (("k",), np.array(["a", "b", "c"])),
+            },
+            coords={
+                "x": np.arange(nx),
+                "y": np.linspace(0, 1, ny),
+                "t": pd.date_range("1970-01-01", periods=nt, freq="D"),
+                "x_coords": ("x", np.linspace(1.1, 2.1, nx)),
+                "z": np.array([1]),
+                "k": np.linspace(0, nx, 3),
+            },
+        )
+
+    @parameterized(["method", "is_short"], (["linear", "cubic"], [True, False]))
+    def time_interpolation_numeric_1d(self, method, is_short):
+        new_x = new_x_short if is_short else new_x_long
+        self.ds.interp(x=new_x, method=method).compute()
+
+    @parameterized(["method"], (["linear", "nearest"]))
+    def time_interpolation_numeric_2d(self, method):
+        self.ds.interp(x=new_x_long, y=new_y_long, method=method).compute()
+
+    @parameterized(["is_short"], ([True, False]))
+    def time_interpolation_string_scalar(self, is_short):
+        new_z = new_x_short if is_short else new_x_long
+        self.ds.interp(z=new_z).compute()
+
+    @parameterized(["is_short"], ([True, False]))
+    def time_interpolation_string_1d(self, is_short):
+        new_k = new_x_short if is_short else new_x_long
+        self.ds.interp(k=new_k).compute()
+
+
+class InterpolationDask(Interpolation):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup(**kwargs)
+        self.ds = self.ds.chunk({"t": 50})
diff --git a/benchmarks/benchmarks/merge.py b/benchmarks/benchmarks/merge.py
new file mode 100644
index 00000000000..6c8c1e9da90
--- /dev/null
+++ b/benchmarks/benchmarks/merge.py
@@ -0,0 +1,77 @@
+import numpy as np
+
+import xarray as xr
+
+
+class DatasetAddVariable:
+    param_names = ["existing_elements"]
+    params = [[0, 10, 100, 1000]]
+
+    def setup(self, existing_elements):
+        self.datasets = {}
+        # Dictionary insertion is fast(er) than xarray.Dataset insertion
+        d = {}
+        for i in range(existing_elements):
+            d[f"var{i}"] = i
+        self.dataset = xr.merge([d])
+
+        d = {f"set_2_{i}": i for i in range(existing_elements)}
+        self.dataset2 = xr.merge([d])
+
+    def time_variable_insertion(self, existing_elements):
+        dataset = self.dataset
+        dataset["new_var"] = 0
+
+    def time_merge_two_datasets(self, existing_elements):
+        xr.merge([self.dataset, self.dataset2])
+
+
+class DatasetCreation:
+    # The idea here is to time how long it takes to go from numpy
+    # and python data types, to a full dataset
+    # See discussion
+    # https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344
+    param_names = ["strategy", "count"]
+    params = [
+        ["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"],
+        [0, 1, 10, 100, 1000],
+    ]
+
+    def setup(self, strategy, count):
+        data = np.array(["0", "b"], dtype=str)
+        self.dataset_coords = dict(time=np.array([0, 1]))
+        self.dataset_attrs = dict(description="Test data")
+        attrs = dict(units="Celsius")
+        if strategy == "dict_of_DataArrays":
+
+            def create_data_vars():
+                return {
+                    f"long_variable_name_{i}": xr.DataArray(
+                        data=data, dims=("time"), attrs=attrs
+                    )
+                    for i in range(count)
+                }
+
+        elif strategy == "dict_of_Variables":
+
+            def create_data_vars():
+                return {
+                    f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs)
+                    for i in range(count)
+                }
+
+        elif strategy == "dict_of_Tuples":
+
+            def create_data_vars():
+                return {
+                    f"long_variable_name_{i}": ("time", data, attrs)
+                    for i in range(count)
+                }
+
+        self.create_data_vars = create_data_vars
+
+    def time_dataset_creation(self, strategy, count):
+        data_vars = self.create_data_vars()
+        xr.Dataset(
+            data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs
+        )
diff --git a/benchmarks/benchmarks/pandas.py b/benchmarks/benchmarks/pandas.py
new file mode 100644
index 00000000000..ebe61081916
--- /dev/null
+++ b/benchmarks/benchmarks/pandas.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import parameterized, requires_dask
+
+
+class MultiIndexSeries:
+    def setup(self, dtype, subset):
+        data = np.random.rand(100000).astype(dtype)
+        index = pd.MultiIndex.from_product(
+            [
+                list("abcdefhijk"),
+                list("abcdefhijk"),
+                pd.date_range(start="2000-01-01", periods=1000, freq="D"),
+            ]
+        )
+        series = pd.Series(data, index)
+        if subset:
+            series = series[::3]
+        self.series = series
+
+    @parameterized(["dtype", "subset"], ([int, float], [True, False]))
+    def time_from_series(self, dtype, subset):
+        xr.DataArray.from_series(self.series)
+
+
+class ToDataFrame:
+    def setup(self, *args, **kwargs):
+        xp = kwargs.get("xp", np)
+        nvars = kwargs.get("nvars", 1)
+        random_kws = kwargs.get("random_kws", {})
+        method = kwargs.get("method", "to_dataframe")
+
+        dim1 = 10_000
+        dim2 = 10_000
+
+        var = xr.Variable(
+            dims=("dim1", "dim2"), data=xp.random.random((dim1, dim2), **random_kws)
+        )
+        data_vars = {f"long_name_{v}": (("dim1", "dim2"), var) for v in range(nvars)}
+
+        ds = xr.Dataset(
+            data_vars, coords={"dim1": np.arange(0, dim1), "dim2": np.arange(0, dim2)}
+        )
+        self.to_frame = getattr(ds, method)
+
+    def time_to_dataframe(self):
+        self.to_frame()
+
+    def peakmem_to_dataframe(self):
+        self.to_frame()
+
+
+class ToDataFrameDask(ToDataFrame):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+
+        import dask.array as da
+
+        super().setup(
+            xp=da, random_kws=dict(chunks=5000), method="to_dask_dataframe", nvars=500
+        )
diff --git a/benchmarks/benchmarks/polyfit.py b/benchmarks/benchmarks/polyfit.py
new file mode 100644
index 00000000000..429ffa19baa
--- /dev/null
+++ b/benchmarks/benchmarks/polyfit.py
@@ -0,0 +1,38 @@
+import numpy as np
+
+import xarray as xr
+
+from . import parameterized, randn, requires_dask
+
+NDEGS = (2, 5, 20)
+NX = (10**2, 10**6)
+
+
+class Polyval:
+    def setup(self, *args, **kwargs):
+        self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX}
+        self.coeffs = {
+            ndeg: xr.DataArray(
+                randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)}
+            )
+            for ndeg in NDEGS
+        }
+
+    @parameterized(["nx", "ndeg"], [NX, NDEGS])
+    def time_polyval(self, nx, ndeg):
+        x = self.xs[nx]
+        c = self.coeffs[ndeg]
+        xr.polyval(x, c).compute()
+
+    @parameterized(["nx", "ndeg"], [NX, NDEGS])
+    def peakmem_polyval(self, nx, ndeg):
+        x = self.xs[nx]
+        c = self.coeffs[ndeg]
+        xr.polyval(x, c).compute()
+
+
+class PolyvalDask(Polyval):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup(*args, **kwargs)
+        self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()}
diff --git a/benchmarks/benchmarks/reindexing.py b/benchmarks/benchmarks/reindexing.py
new file mode 100644
index 00000000000..61e6b2213f3
--- /dev/null
+++ b/benchmarks/benchmarks/reindexing.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+import xarray as xr
+
+from . import requires_dask
+
+ntime = 500
+nx = 50
+ny = 50
+
+
+class Reindex:
+    def setup(self):
+        data = np.random.default_rng(0).random((ntime, nx, ny))
+        self.ds = xr.Dataset(
+            {"temperature": (("time", "x", "y"), data)},
+            coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)},
+        )
+
+    def time_1d_coarse(self):
+        self.ds.reindex(time=np.arange(0, ntime, 5)).load()
+
+    def time_1d_fine_all_found(self):
+        self.ds.reindex(time=np.arange(0, ntime, 0.5), method="nearest").load()
+
+    def time_1d_fine_some_missing(self):
+        self.ds.reindex(
+            time=np.arange(0, ntime, 0.5), method="nearest", tolerance=0.1
+        ).load()
+
+    def time_2d_coarse(self):
+        self.ds.reindex(x=np.arange(0, nx, 2), y=np.arange(0, ny, 2)).load()
+
+    def time_2d_fine_all_found(self):
+        self.ds.reindex(
+            x=np.arange(0, nx, 0.5), y=np.arange(0, ny, 0.5), method="nearest"
+        ).load()
+
+    def time_2d_fine_some_missing(self):
+        self.ds.reindex(
+            x=np.arange(0, nx, 0.5),
+            y=np.arange(0, ny, 0.5),
+            method="nearest",
+            tolerance=0.1,
+        ).load()
+
+
+class ReindexDask(Reindex):
+    def setup(self):
+        requires_dask()
+        super().setup()
+        self.ds = self.ds.chunk({"time": 100})
diff --git a/benchmarks/benchmarks/renaming.py b/benchmarks/benchmarks/renaming.py
new file mode 100644
index 00000000000..3ade5d8df70
--- /dev/null
+++ b/benchmarks/benchmarks/renaming.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+import xarray as xr
+
+
+class SwapDims:
+    param_names = ["size"]
+    params = [[int(1e3), int(1e5), int(1e7)]]
+
+    def setup(self, size: int) -> None:
+        self.ds = xr.Dataset(
+            {"a": (("x", "t"), np.ones((size, 2)))},
+            coords={
+                "x": np.arange(size),
+                "y": np.arange(size),
+                "z": np.arange(size),
+                "x2": ("x", np.arange(size)),
+                "y2": ("y", np.arange(size)),
+                "z2": ("z", np.arange(size)),
+            },
+        )
+
+    def time_swap_dims(self, size: int) -> None:
+        self.ds.swap_dims({"x": "xn", "y": "yn", "z": "zn"})
+
+    def time_swap_dims_newindex(self, size: int) -> None:
+        self.ds.swap_dims({"x": "x2", "y": "y2", "z": "z2"})
diff --git a/benchmarks/benchmarks/repr.py b/benchmarks/benchmarks/repr.py
new file mode 100644
index 00000000000..68a082fcc4f
--- /dev/null
+++ b/benchmarks/benchmarks/repr.py
@@ -0,0 +1,87 @@
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+
+class Repr:
+    def setup(self):
+        a = np.arange(0, 100)
+        data_vars = dict()
+        for i in a:
+            data_vars[f"long_variable_name_{i}"] = xr.DataArray(
+                name=f"long_variable_name_{i}",
+                data=np.arange(0, 20),
+                dims=[f"long_coord_name_{i}_x"],
+                coords={f"long_coord_name_{i}_x": np.arange(0, 20) * 2},
+            )
+        self.ds = xr.Dataset(data_vars)
+        self.ds.attrs = {f"attr_{k}": 2 for k in a}
+
+    def time_repr(self):
+        repr(self.ds)
+
+    def time_repr_html(self):
+        self.ds._repr_html_()
+
+
+class ReprDataTree:
+    def setup(self):
+        # construct a datatree with 500 nodes
+        number_of_files = 20
+        number_of_groups = 25
+        tree_dict = {}
+        for f in range(number_of_files):
+            for g in range(number_of_groups):
+                tree_dict[f"file_{f}/group_{g}"] = xr.Dataset({"g": f * g})
+
+        self.dt = xr.DataTree.from_dict(tree_dict)
+
+    def time_repr(self):
+        repr(self.dt)
+
+    def time_repr_html(self):
+        self.dt._repr_html_()
+
+
+class ReprMultiIndex:
+    def setup(self):
+        index = pd.MultiIndex.from_product(
+            [range(1000), range(1000)], names=("level_0", "level_1")
+        )
+        series = pd.Series(range(1000 * 1000), index=index)
+        self.da = xr.DataArray(series)
+
+    def time_repr(self):
+        repr(self.da)
+
+    def time_repr_html(self):
+        self.da._repr_html_()
+
+
+class ReprPandasRangeIndex:
+    # display a memory-saving pandas.RangeIndex shouldn't trigger memory
+    # expensive conversion into a numpy array
+    def setup(self):
+        index = xr.indexes.PandasIndex(pd.RangeIndex(1_000_000), "x")
+        self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
+
+    def time_repr(self):
+        repr(self.ds.x)
+
+    def time_repr_html(self):
+        self.ds.x._repr_html_()
+
+
+class ReprXarrayRangeIndex:
+    # display an Xarray RangeIndex shouldn't trigger memory expensive conversion
+    # of its lazy coordinate into a numpy array
+    def setup(self):
+        index = xr.indexes.RangeIndex.arange(1_000_000, dim="x")
+        self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
+
+    def time_repr(self):
+        repr(self.ds.x)
+
+    def time_repr_html(self):
+        self.ds.x._repr_html_()
diff --git a/benchmarks/benchmarks/rolling.py b/benchmarks/benchmarks/rolling.py
new file mode 100644
index 00000000000..4fa2e09c9c0
--- /dev/null
+++ b/benchmarks/benchmarks/rolling.py
@@ -0,0 +1,142 @@
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import _skip_slow, parameterized, randn, requires_dask
+
+nx = 3000
+long_nx = 30000
+ny = 200
+nt = 1000
+window = 20
+
+randn_xy = randn((nx, ny), frac_nan=0.1)
+randn_xt = randn((nx, nt))
+randn_t = randn((nt,))
+randn_long = randn((long_nx,), frac_nan=0.1)
+
+
+class Rolling:
+    def setup(self, *args, **kwargs):
+        self.ds = xr.Dataset(
+            {
+                "var1": (("x", "y"), randn_xy),
+                "var2": (("x", "t"), randn_xt),
+                "var3": (("t",), randn_t),
+            },
+            coords={
+                "x": np.arange(nx),
+                "y": np.linspace(0, 1, ny),
+                "t": pd.date_range("1970-01-01", periods=nt, freq="D"),
+                "x_coords": ("x", np.linspace(1.1, 2.1, nx)),
+            },
+        )
+        self.da_long = xr.DataArray(
+            randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1}
+        )
+
+    @parameterized(
+        ["func", "center", "use_bottleneck"],
+        (["mean", "count"], [True, False], [True, False]),
+    )
+    def time_rolling(self, func, center, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            getattr(self.ds.rolling(x=window, center=center), func)().load()
+
+    @parameterized(
+        ["func", "pandas", "use_bottleneck"],
+        (["mean", "count"], [True, False], [True, False]),
+    )
+    def time_rolling_long(self, func, pandas, use_bottleneck):
+        if pandas:
+            se = self.da_long.to_series()
+            getattr(se.rolling(window=window, min_periods=window), func)()
+        else:
+            with xr.set_options(use_bottleneck=use_bottleneck):
+                getattr(
+                    self.da_long.rolling(x=window, min_periods=window), func
+                )().load()
+
+    @parameterized(
+        ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False])
+    )
+    def time_rolling_np(self, window_, min_periods, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
+                np.nansum
+            ).load()
+
+    @parameterized(
+        ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False])
+    )
+    def time_rolling_construct(self, center, stride, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            self.ds.rolling(x=window, center=center).construct(
+                "window_dim", stride=stride
+            ).sum(dim="window_dim").load()
+
+
+class RollingDask(Rolling):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
+        super().setup(**kwargs)
+        self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50})
+        self.da_long = self.da_long.chunk({"x": 10000})
+
+
+class RollingMemory:
+    def setup(self, *args, **kwargs):
+        self.ds = xr.Dataset(
+            {
+                "var1": (("x", "y"), randn_xy),
+                "var2": (("x", "t"), randn_xt),
+                "var3": (("t",), randn_t),
+            },
+            coords={
+                "x": np.arange(nx),
+                "y": np.linspace(0, 1, ny),
+                "t": pd.date_range("1970-01-01", periods=nt, freq="D"),
+                "x_coords": ("x", np.linspace(1.1, 2.1, nx)),
+            },
+        )
+
+
+class DataArrayRollingMemory(RollingMemory):
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_ndrolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.var1.rolling(x=10, y=4)
+            getattr(roll, func)()
+
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_1drolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.var3.rolling(t=100)
+            getattr(roll, func)()
+
+    @parameterized(["stride"], ([None, 5, 50]))
+    def peakmem_1drolling_construct(self, stride):
+        self.ds.var2.rolling(t=100).construct("w", stride=stride)
+        self.ds.var3.rolling(t=100).construct("w", stride=stride)
+
+
+class DatasetRollingMemory(RollingMemory):
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_ndrolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.rolling(x=10, y=4)
+            getattr(roll, func)()
+
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_1drolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.rolling(t=100)
+            getattr(roll, func)()
+
+    @parameterized(["stride"], ([None, 5, 50]))
+    def peakmem_1drolling_construct(self, stride):
+        self.ds.rolling(t=100).construct("w", stride=stride)
diff --git a/benchmarks/benchmarks/unstacking.py b/benchmarks/benchmarks/unstacking.py
new file mode 100644
index 00000000000..b3af5eac19c
--- /dev/null
+++ b/benchmarks/benchmarks/unstacking.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pandas as pd
+
+import xarray as xr
+
+from . import requires_dask, requires_sparse
+
+
+class Unstacking:
+    def setup(self):
+        data = np.random.default_rng(0).random((250, 500))
+        self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
+        self.da_missing = self.da_full[:-1]
+        self.df_missing = self.da_missing.to_pandas()
+
+    def time_unstack_fast(self):
+        self.da_full.unstack("flat_dim")
+
+    def time_unstack_slow(self):
+        self.da_missing.unstack("flat_dim")
+
+    def time_unstack_pandas_slow(self):
+        self.df_missing.unstack()
+
+
+class UnstackingDask(Unstacking):
+    def setup(self, *args, **kwargs):
+        requires_dask()
+        super().setup(**kwargs)
+        self.da_full = self.da_full.chunk({"flat_dim": 25})
+
+
+class UnstackingSparse(Unstacking):
+    def setup(self, *args, **kwargs):
+        requires_sparse()
+
+        import sparse
+
+        data = sparse.random((500, 1000), random_state=0, fill_value=0)
+        self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
+        self.da_missing = self.da_full[:-1]
+
+        mindex = pd.MultiIndex.from_arrays([np.arange(100), np.arange(100)])
+        self.da_eye_2d = xr.DataArray(np.ones((100,)), dims="z", coords={"z": mindex})
+        self.da_eye_3d = xr.DataArray(
+            np.ones((100, 50)),
+            dims=("z", "foo"),
+            coords={"z": mindex, "foo": np.arange(50)},
+        )
+
+    def time_unstack_to_sparse_2d(self):
+        self.da_eye_2d.unstack(sparse=True)
+
+    def time_unstack_to_sparse_3d(self):
+        self.da_eye_3d.unstack(sparse=True)
+
+    def peakmem_unstack_to_sparse_2d(self):
+        self.da_eye_2d.unstack(sparse=True)
+
+    def peakmem_unstack_to_sparse_3d(self):
+        self.da_eye_3d.unstack(sparse=True)
+
+    def time_unstack_pandas_slow(self):
+        pass
diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py
new file mode 100644
index 00000000000..c0da5b5fb47
--- /dev/null
+++ b/benchmarks/bm_runner.py
@@ -0,0 +1,739 @@
+#!/usr/bin/env python3
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Argparse conveniences for executing common types of benchmark runs."""
+
+from abc import ABC, abstractmethod
+import argparse
+from datetime import datetime
+from importlib import import_module
+from os import environ
+from pathlib import Path
+import re
+import shlex
+import subprocess
+from tempfile import NamedTemporaryFile
+from textwrap import dedent
+from typing import Literal, Protocol
+
+# The threshold beyond which shifts are 'notable'. See `asv compare`` docs
+#  for more.
+COMPARE_FACTOR = 1.2
+
+BENCHMARKS_DIR = Path(__file__).parent
+ROOT_DIR = BENCHMARKS_DIR.parent
+# Storage location for reports used in GitHub actions.
+GH_REPORT_DIR = ROOT_DIR.joinpath(".github", "workflows", "benchmark_reports")
+
+# Common ASV arguments for all run_types except `custom`.
+ASV_HARNESS = "run {posargs} --attribute rounds=3 --interleave-rounds --show-stderr"
+
+
+def echo(echo_string: str):
+    # Use subprocess for printing to reduce chance of printing out of sequence
+    #  with the subsequent calls.
+    subprocess.run(["echo", f"BM_RUNNER DEBUG: {echo_string}"])
+
+
+def _subprocess_runner(args, asv=False, **kwargs):
+    # Avoid permanent modifications if the same arguments are used more than once.
+    args = args.copy()
+    kwargs = kwargs.copy()
+    if asv:
+        args.insert(0, "asv")
+        kwargs["cwd"] = BENCHMARKS_DIR
+    echo(" ".join(args))
+    kwargs.setdefault("check", True)
+    return subprocess.run(args, **kwargs)
+
+
+def _subprocess_runner_capture(args, **kwargs) -> str:
+    result = _subprocess_runner(args, capture_output=True, **kwargs)
+    return result.stdout.decode().rstrip()
+
+
+def _check_requirements(package: str) -> None:
+    try:
+        import_module(package)
+    except ImportError as exc:
+        message = (
+            f"No {package} install detected. Benchmarks can only "
+            f"be run in an environment including {package}."
+        )
+        raise Exception(message) from exc
+
+
+def _prep_data_gen_env() -> None:
+    """Create or access a separate, unchanging environment for generating test data."""
+    python_version = "3.13"
+    data_gen_var = "DATA_GEN_PYTHON"
+    if data_gen_var in environ:
+        echo("Using existing data generation environment.")
+    else:
+        echo("Setting up the data generation environment ...")
+        # Get Nox to build an environment for the `tests` session, but don't
+        #  run the session. Will reuse a cached environment if appropriate.
+        _subprocess_runner(
+            [
+                "nox",
+                f"--noxfile={ROOT_DIR / 'noxfile.py'}",
+                "--session=tests",
+                "--install-only",
+                f"--python={python_version}",
+            ]
+        )
+        # Find the environment built above, set it to be the data generation
+        #  environment.
+        env_directory: Path = next((ROOT_DIR / ".nox").rglob(f"tests*"))
+        data_gen_python = (env_directory / "bin" / "python").resolve()
+        environ[data_gen_var] = str(data_gen_python)
+
+        def clone_resource(name: str, clone_source: str) -> Path:
+            resource_dir = data_gen_python.parents[1] / "resources"
+            resource_dir.mkdir(exist_ok=True)
+            clone_dir = resource_dir / name
+            if not clone_dir.is_dir():
+                _subprocess_runner(["git", "clone", clone_source, str(clone_dir)])
+            return clone_dir
+
+        echo("Installing Mule into data generation environment ...")
+        mule_dir = clone_resource("mule", "https://github.com/metomi/mule.git")
+        _subprocess_runner(
+            [
+                str(data_gen_python),
+                "-m",
+                "pip",
+                "install",
+                str(mule_dir / "mule"),
+            ]
+        )
+
+        test_data_var = "OVERRIDE_TEST_DATA_REPOSITORY"
+        if test_data_var not in environ:
+            echo("Installing iris-test-data into data generation environment ...")
+            test_data_dir = clone_resource(
+                "iris-test-data", "https://github.com/SciTools/iris-test-data.git"
+            )
+            environ[test_data_var] = str(test_data_dir / "test_data")
+
+        echo("Data generation environment ready.")
+
+
+def _setup_common() -> None:
+    _check_requirements("asv")
+    _check_requirements("nox")
+
+    _prep_data_gen_env()
+
+    echo("Setting up ASV ...")
+    _subprocess_runner(["machine", "--yes"], asv=True)
+
+    echo("Setup complete.")
+
+
+def _asv_compare(
+    *commits: str,
+    overnight_mode: bool = False,
+    fail_on_regression: bool = False,
+) -> None:
+    """Run through a list of commits comparing each one to the next."""
+    commits = tuple(commit[:8] for commit in commits)
+
+    machine_script = [
+        "from asv.machine import Machine",
+        "print(Machine.get_unique_machine_name())",
+    ]
+    machine_name = _subprocess_runner_capture(
+        ["python", "-c", ";".join(machine_script)]
+    )
+
+    for i in range(len(commits) - 1):
+        before = commits[i]
+        after = commits[i + 1]
+        asv_command = shlex.split(
+            f"compare {before} {after} "
+            f"--machine {machine_name} --factor={COMPARE_FACTOR} --split"
+        )
+
+        comparison = _subprocess_runner_capture(asv_command, asv=True)
+        echo(comparison)
+        shifts = _subprocess_runner_capture([*asv_command, "--only-changed"], asv=True)
+
+        if shifts or (not overnight_mode):
+            # For the overnight run: only post if there are shifts.
+            _gh_create_reports(after, comparison, shifts)
+
+        if shifts and fail_on_regression:
+            # fail_on_regression supports setups that expect CI failures.
+            message = (
+                f"Performance shifts detected between commits {before} and {after}.\n"
+            )
+            raise RuntimeError(message)
+
+
+def _gh_create_reports(commit_sha: str, results_full: str, results_shifts: str) -> None:
+    """If running under GitHub Actions: record the results in report(s).
+
+    Posting the reports is done by :func:`_gh_post_reports`, which must be run
+    within a separate action to comply with GHA's security limitations.
+    """
+    if "GITHUB_ACTIONS" not in environ:
+        # Only run when within GHA.
+        return
+
+    pr_number = environ.get("PR_NUMBER", None)
+    on_pull_request = pr_number is not None
+    run_id = environ["GITHUB_RUN_ID"]
+    repo = environ["GITHUB_REPOSITORY"]
+    gha_run_link = f"[`{run_id}`](https://github.com/{repo}/actions/runs/{run_id})"
+
+    GH_REPORT_DIR.mkdir(exist_ok=True)
+    commit_dir = GH_REPORT_DIR / commit_sha
+    commit_dir.mkdir()
+    command_path = commit_dir / "command.txt"
+    body_path = commit_dir / "body.txt"
+
+    performance_report = dedent(
+        (
+            """
+            # :stopwatch: Performance Benchmark Report: {commit_sha}
+
+            <details>
+            <summary>Performance shifts</summary>
+
+            ```
+            {results_shifts}
+            ```
+
+            </details>
+
+            <details>
+            <summary>Full benchmark results</summary>
+
+            ```
+            {results_full}
+            ```
+
+            </details>
+
+            Generated by GHA run {gha_run_link}
+            """
+        )
+    )
+    performance_report = performance_report.format(
+        commit_sha=commit_sha,
+        results_shifts=results_shifts,
+        results_full=results_full,
+        gha_run_link=gha_run_link,
+    )
+
+    if on_pull_request:
+        # Command to post the report as a comment on the active PR.
+        body_path.write_text(performance_report)
+        command = (
+            f"gh pr comment {pr_number} "
+            f"--body-file {body_path.absolute()} "
+            f"--repo {repo}"
+        )
+        command_path.write_text(command)
+
+    else:
+        # Command to post the report as new issue.
+        commit_msg = _subprocess_runner_capture(
+            f"git log {commit_sha}^! --oneline".split(" ")
+        )
+        # Intended for benchmarking commits on trunk - should include a PR
+        #  number due to our squash policy.
+        pr_tag_match = re.search("#[0-9]*", commit_msg)
+
+        assignee = ""
+        pr_tag = "pull request number unavailable"
+        if pr_tag_match is not None:
+            pr_tag = pr_tag_match.group(0)
+
+            for login_type in ("author", "mergedBy"):
+                gh_query = f'.["{login_type}"]["login"]'
+                commandlist = shlex.split(
+                    f"gh pr view {pr_tag[1:]} "
+                    f"--json {login_type} -q '{gh_query}' "
+                    f"--repo {repo}"
+                )
+                login = _subprocess_runner_capture(commandlist)
+
+                commandlist = [
+                    "curl",
+                    "-s",
+                    f"https://api.github.com/users/{login}",
+                ]
+                login_info = _subprocess_runner_capture(commandlist)
+                is_user = '"type": "User"' in login_info
+                if is_user:
+                    assignee = login
+                    break
+
+        title = f"Performance Shift(s): `{commit_sha}`"
+        body = dedent(
+            (
+                f"""
+                Benchmark comparison has identified performance shifts at:
+
+                * commit {commit_sha} ({pr_tag}).
+
+                <p>
+                Please review the report below and
+                take corrective/congratulatory action as appropriate
+                :slightly_smiling_face:
+                </p>
+                """
+            )
+        )
+        body += performance_report
+        body_path.write_text(body)
+
+        command = (
+            "gh issue create "
+            f'--title "{title}" '
+            f"--body-file {body_path.absolute()} "
+            '--label "Bot" '
+            '--label "Type: Performance" '
+            f"--repo {repo}"
+        )
+        if assignee:
+            command += f" --assignee {assignee}"
+        command_path.write_text(command)
+
+
+def _gh_post_reports() -> None:
+    """If running under GitHub Actions: post pre-prepared benchmark reports.
+
+    Reports are prepared by :func:`_gh_create_reports`, which must be run
+    within a separate action to comply with GHA's security limitations.
+    """
+    if "GITHUB_ACTIONS" not in environ:
+        # Only run when within GHA.
+        return
+
+    commit_dirs = [x for x in GH_REPORT_DIR.iterdir() if x.is_dir()]
+    for commit_dir in commit_dirs:
+        command_path = commit_dir / "command.txt"
+        command = command_path.read_text()
+
+        # Security: only accept certain commands to run.
+        assert command.startswith(("gh issue create", "gh pr comment"))
+
+        _subprocess_runner(shlex.split(command))
+
+
+class _SubParserGenerator(ABC):
+    """Convenience for holding all the necessary argparse info in 1 place."""
+
+    name: str = NotImplemented
+    description: str = NotImplemented
+    epilog: str = NotImplemented
+
+    class _SubParsersType(Protocol):
+        """Duck typing since argparse._SubParsersAction is private."""
+
+        def add_parser(self, name, **kwargs) -> argparse.ArgumentParser: ...
+
+    def __init__(self, subparsers: _SubParsersType) -> None:
+        self.subparser = subparsers.add_parser(
+            self.name,
+            description=self.description,
+            epilog=self.epilog,
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self.add_arguments()
+        self.add_asv_arguments()
+        self.subparser.set_defaults(func=self.func)
+
+    @abstractmethod
+    def add_arguments(self) -> None:
+        """All custom self.subparser.add_argument() calls."""
+        _ = NotImplemented
+
+    def add_asv_arguments(self) -> None:
+        self.subparser.add_argument(
+            "asv_args",
+            nargs=argparse.REMAINDER,
+            help="Any number of arguments to pass down to the ASV benchmark command.",
+        )
+
+    @staticmethod
+    @abstractmethod
+    def func(args: argparse.Namespace):
+        """Return when the subparser is parsed.
+
+        `func` is then called, performing the user's selected sub-command.
+
+        """
+        _ = args
+        return NotImplemented
+
+
+class Overnight(_SubParserGenerator):
+    name = "overnight"
+    description = (
+        "Benchmarks all commits between the input **first_commit** to ``HEAD``, "
+        "comparing each to its parent for performance shifts. If running on "
+        "GitHub Actions: performance shift(s) will be reported in a new issue.\n"
+        "Designed for checking the previous 24 hours' commits, typically in a "
+        "scheduled script.\n"
+        "Uses `asv run`."
+    )
+    epilog = (
+        "e.g. python bm_runner.py overnight a1b23d4\n"
+        "e.g. python bm_runner.py overnight a1b23d4 --bench=regridding"
+    )
+
+    def add_arguments(self) -> None:
+        self.subparser.add_argument(
+            "first_commit",
+            type=str,
+            help="The first commit in the benchmarking commit sequence.",
+        )
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _setup_common()
+
+        commit_range = f"{args.first_commit}^^.."
+        # git rev-list --first-parent is the command ASV uses.
+        git_command = shlex.split(f"git rev-list --first-parent {commit_range}")
+        commit_string = _subprocess_runner_capture(git_command)
+        commit_list = commit_string.split("\n")
+
+        asv_command = shlex.split(ASV_HARNESS.format(posargs=commit_range))
+        try:
+            _subprocess_runner([*asv_command, *args.asv_args], asv=True)
+        finally:
+            # Designed for long running - want to compare/post any valid
+            #  results even if some are broken.
+            _asv_compare(*reversed(commit_list), overnight_mode=True)
+
+
+class Branch(_SubParserGenerator):
+    name = "branch"
+    description = (
+        "Performs the same operations as ``overnight``, but always on two "
+        "commits only - ``HEAD``, and ``HEAD``'s merge-base with the input "
+        "**base_branch**.\n"
+        "If running on GitHub Actions: HEAD will be GitHub's "
+        "merge commit and merge-base will be the merge target. Performance "
+        "comparisons will be posted in a comment on the relevant pull request.\n"
+        "Designed for testing if the active branch's changes cause performance "
+        "shifts - anticipating what would be caught by ``overnight`` once "
+        "merged.\n\n"
+        "**For maximum accuracy, avoid using the machine that is running this "
+        "session. Run time could be >1 hour for the full benchmark suite.**\n"
+        "Uses `asv run`."
+    )
+    epilog = (
+        "e.g. python bm_runner.py branch upstream/main\n"
+        "e.g. python bm_runner.py branch upstream/main --bench=regridding"
+    )
+
+    def add_arguments(self) -> None:
+        self.subparser.add_argument(
+            "base_branch",
+            type=str,
+            help="A branch that has the merge-base with ``HEAD`` - ``HEAD`` will be benchmarked against that merge-base.",
+        )
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _setup_common()
+
+        git_command = shlex.split("git rev-parse HEAD")
+        head_sha = _subprocess_runner_capture(git_command)[:8]
+
+        git_command = shlex.split(f"git merge-base {head_sha} {args.base_branch}")
+        merge_base = _subprocess_runner_capture(git_command)[:8]
+
+        with NamedTemporaryFile("w") as hashfile:
+            hashfile.writelines([merge_base, "\n", head_sha])
+            hashfile.flush()
+            commit_range = f"HASHFILE:{hashfile.name}"
+            asv_command = shlex.split(ASV_HARNESS.format(posargs=commit_range))
+            _subprocess_runner([*asv_command, *args.asv_args], asv=True)
+
+        _asv_compare(merge_base, head_sha)
+
+
+class _CSPerf(_SubParserGenerator, ABC):
+    """Common code used by both CPerf and SPerf."""
+
+    description = (
+        "Run the on-demand {} suite of benchmarks (part of the UK Met "
+        "Office NG-VAT project) for the ``HEAD`` of ``upstream/main`` only, "
+        "and publish the results to the input **publish_dir**, within a "
+        "unique subdirectory for this run.\n"
+        "Uses `asv run`."
+    )
+    epilog = (
+        "e.g. python bm_runner.py {0} my_publish_dir\n"
+        "e.g. python bm_runner.py {0} my_publish_dir --bench=regridding"
+    )
+
+    def add_arguments(self) -> None:
+        self.subparser.add_argument(
+            "publish_dir",
+            type=str,
+            help="HTML results will be published to a sub-dir in this dir.",
+        )
+
+    @staticmethod
+    def csperf(args: argparse.Namespace, run_type: Literal["cperf", "sperf"]) -> None:
+        _setup_common()
+
+        publish_dir = Path(args.publish_dir)
+        if not publish_dir.is_dir():
+            message = f"Input 'publish directory' is not a directory: {publish_dir}"
+            raise NotADirectoryError(message)
+        publish_subdir = (
+            publish_dir / f"{run_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        )
+        publish_subdir.mkdir()
+
+        # Activate on demand benchmarks (C/SPerf are deactivated for
+        #  'standard' runs).
+        environ["ON_DEMAND_BENCHMARKS"] = "True"
+        commit_range = "upstream/main^!"
+
+        asv_command_str = (
+            ASV_HARNESS.format(posargs=commit_range) + f" --bench={run_type}"
+        )
+
+        # Only do a single round.
+        asv_command = shlex.split(re.sub(r"rounds=\d", "rounds=1", asv_command_str))
+        try:
+            _subprocess_runner([*asv_command, *args.asv_args], asv=True)
+        except subprocess.CalledProcessError as err:
+            # C/SPerf benchmarks are much bigger than the CI ones:
+            # Don't fail the whole run if memory blows on 1 benchmark.
+            # ASV produces return code of 2 if the run includes crashes.
+            if err.returncode != 2:
+                raise
+
+        asv_command = shlex.split(f"publish {commit_range} --html-dir={publish_subdir}")
+        _subprocess_runner(asv_command, asv=True)
+
+        # Print completion message.
+        location = BENCHMARKS_DIR / ".asv"
+        echo(
+            f'New ASV results for "{run_type}".\n'
+            f'See "{publish_subdir}",'
+            f'\n  or JSON files under "{location / "results"}".'
+        )
+
+
+class CPerf(_CSPerf):
+    name = "cperf"
+    description = _CSPerf.description.format("CPerf")
+    epilog = _CSPerf.epilog.format("cperf")
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _CSPerf.csperf(args, "cperf")
+
+
+class SPerf(_CSPerf):
+    name = "sperf"
+    description = _CSPerf.description.format("SPerf")
+    epilog = _CSPerf.epilog.format("sperf")
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _CSPerf.csperf(args, "sperf")
+
+
+class Custom(_SubParserGenerator):
+    name = "custom"
+    description = (
+        "Run ASV with the input **ASV sub-command**, without any preset "
+        "arguments - must all be supplied by the user. So just like running "
+        "ASV manually, with the convenience of re-using the runner's "
+        "scripted setup steps."
+    )
+    epilog = "e.g. python bm_runner.py custom continuous a1b23d4 HEAD --quick"
+
+    def add_arguments(self) -> None:
+        self.subparser.add_argument(
+            "asv_sub_command",
+            type=str,
+            help="The ASV command to run.",
+        )
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _setup_common()
+        _subprocess_runner([args.asv_sub_command, *args.asv_args], asv=True)
+
+
+class TrialRun(_SubParserGenerator):
+    name = "trialrun"
+    description = (
+        "Fast trial-run a given benchmark, to check it works : "
+        "in a provided or latest-lockfile environment, "
+        "with no repeats for accuracy of measurement."
+    )
+    epilog = (
+        "e.g. python bm_runner.py trialrun "
+        "MyBenchmarks.time_calc ${DATA_GEN_PYTHON}"
+        "\n\nNOTE: 'runpath' also replaces $DATA_GEN_PYTHON during the run."
+    )
+
+    def add_arguments(self) -> None:
+        self.subparser.add_argument(
+            "benchmark",
+            type=str,
+            help=(
+                "A benchmark name, possibly including wildcards, "
+                "as supported by the ASV '--bench' argument."
+            ),
+        )
+        self.subparser.add_argument(
+            "runpath",
+            type=str,
+            help=(
+                "A path to an existing python executable, "
+                "to completely bypass environment building."
+            ),
+        )
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        if args.runpath:
+            # Shortcut creation of a data-gen environment
+            # - which is also the trial-run env.
+            python_path = Path(args.runpath).resolve()
+            environ["DATA_GEN_PYTHON"] = str(python_path)
+        _setup_common()
+        # get path of data-gen environment, setup by previous call
+        python_path = Path(environ["DATA_GEN_PYTHON"])
+        # allow 'on-demand' benchmarks
+        environ["ON_DEMAND_BENCHMARKS"] = "1"
+        asv_command = [
+            "run",
+            "--bench",
+            args.benchmark,
+            # no repeats for timing accuracy
+            "--quick",
+            "--show-stderr",
+            # do not build a unique env : run test in data-gen environment
+            "--environment",
+            f"existing:{python_path}",
+        ] + args.asv_args
+        _subprocess_runner(asv_command, asv=True)
+
+
+class Validate(_SubParserGenerator):
+    name = "validate"
+    description = (
+        "Quickly check that the benchmark architecture works as intended with "
+        "the current codebase. Things that are checked: env creation/update, "
+        "package build/install/uninstall, artificial data creation."
+    )
+    epilog = "Sole acceptable syntax: python bm_runner.py validate"
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _setup_common()
+
+        git_command = shlex.split("git rev-parse HEAD")
+        head_sha = _subprocess_runner_capture(git_command)[:8]
+
+        # Find the most recent commit where the lock-files are not
+        #  identical to HEAD - will force environment updates.
+        locks_dir = Path(__file__).parents[1] / "ci" / "requirements" / "locks"
+        assert locks_dir.is_dir()
+        git_command = shlex.split(
+            f"git log -1 --pretty=format:%P -- {locks_dir.resolve()}"
+        )
+        locks_sha = _subprocess_runner_capture(git_command)[:8]
+
+        with NamedTemporaryFile("w") as hashfile:
+            hashfile.writelines([locks_sha, "\n", head_sha])
+            hashfile.flush()
+            asv_command = shlex.split(
+                f"run HASHFILE:{hashfile.name} --bench ValidateSetup "
+                "--attribute rounds=1 --show-stderr"
+            )
+            extra_env = environ | {"ON_DEMAND_BENCHMARKS": "1"}
+            _subprocess_runner(asv_command, asv=True, env=extra_env)
+
+    # No arguments permitted for this subclass:
+
+    def add_arguments(self) -> None:
+        pass
+
+    def add_asv_arguments(self) -> None:
+        pass
+
+
+class GhPost(_SubParserGenerator):
+    name = "_gh_post"
+    description = (
+        "Used by GitHub Actions to post benchmark reports that were prepared "
+        "during previous actions. Separated to comply with GitHub's security "
+        "requirements."
+    )
+    epilog = "Sole acceptable syntax: python bm_runner.py _gh_post"
+
+    @staticmethod
+    def func(args: argparse.Namespace) -> None:
+        _gh_post_reports()
+
+    # No arguments permitted for this subclass:
+
+    def add_arguments(self) -> None:
+        pass
+
+    def add_asv_arguments(self) -> None:
+        pass
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Run the repository performance benchmarks (using Airspeed Velocity)."
+        ),
+        epilog=(
+            "More help is available within each sub-command."
+            "\n\nNOTE(1): a separate python environment is created to "
+            "construct test files.\n   Set $DATA_GEN_PYTHON to avoid the cost "
+            "of this."
+            "\nNOTE(2): iris-test-data is downloaded and cached within the "
+            "data generation environment.\n   Set "
+            "$OVERRIDE_TEST_DATA_REPOSITORY to avoid the cost of this."
+            "\nNOTE(3): test data is cached within the "
+            "benchmarks code directory, and uses a lot of disk space "
+            "of disk space (Gb).\n   Set $BENCHMARK_DATA to specify where this "
+            "space can be safely allocated."
+        ),
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    subparsers = parser.add_subparsers(required=True)
+
+    parser_generators: tuple[type[_SubParserGenerator], ...] = (
+        Overnight,
+        Branch,
+        CPerf,
+        SPerf,
+        Custom,
+        TrialRun,
+        Validate,
+        GhPost,
+    )
+
+    for gen in parser_generators:
+        _ = gen(subparsers).subparser
+
+    parsed = parser.parse_args()
+    parsed.func(parsed)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/custom_bms/README.md b/benchmarks/custom_bms/README.md
new file mode 100644
index 00000000000..eea85d74fe9
--- /dev/null
+++ b/benchmarks/custom_bms/README.md
@@ -0,0 +1,11 @@
+# Iris custom benchmarks
+
+To be recognised by ASV, these benchmarks must be packaged and installed in 
+line with the
+[ASV guidelines](https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html).
+This is achieved using the custom build in [install.py](./install.py).
+
+Installation is into the environment where the benchmarks are run (i.e. not
+the environment containing ASV + Nox, but the one built to the same
+specifications as the Tests environment). This is done via `build_command`
+in [asv.conf.json](../asv.conf.json).
diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py
new file mode 100644
index 00000000000..bda9f1cc3cd
--- /dev/null
+++ b/benchmarks/custom_bms/install.py
@@ -0,0 +1,55 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Install the SciTools custom benchmarks for detection by ASV.
+
+See the requirements for being detected as an ASV plugin:
+https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+"""
+
+from pathlib import Path
+import shutil
+from subprocess import run
+from tempfile import TemporaryDirectory
+
+this_dir = Path(__file__).parent
+
+
+def package_files(new_dir: Path) -> None:
+    """Package SciTools' custom benchmarks for detection by ASV.
+
+    Parameters
+    ----------
+    new_dir : Path
+        The directory to package the custom benchmarks in.
+    """
+    asv_bench_scitools = new_dir / "asv_bench_scitools"
+    benchmarks = asv_bench_scitools / "benchmarks"
+    benchmarks.mkdir(parents=True)
+    (asv_bench_scitools / "__init__.py").touch()
+
+    for py_file in this_dir.glob("*.py"):
+        if py_file != Path(__file__):
+            shutil.copy2(py_file, benchmarks)
+
+    # Create this on the fly, as having multiple pyproject.toml files in 1
+    #  project causes problems.
+    py_project = new_dir / "pyproject.toml"
+    py_project.write_text(
+        """
+        [project]
+        name = "asv_bench_scitools"
+        version = "0.1"
+        """
+    )
+
+
+def main():
+    with TemporaryDirectory() as temp_dir:
+        package_files(Path(temp_dir))
+        run(["python", "-m", "pip", "install", temp_dir])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py
new file mode 100644
index 00000000000..486c67aeb99
--- /dev/null
+++ b/benchmarks/custom_bms/tracemallocbench.py
@@ -0,0 +1,196 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+
+"""Benchmark for growth in process resident memory, repeating for accuracy.
+
+Uses a modified version of the repeat logic in
+:class:`asv_runner.benchmarks.time.TimeBenchmark`.
+"""
+
+import re
+from timeit import Timer
+import tracemalloc
+from typing import Callable
+
+from asv_runner.benchmarks.time import TimeBenchmark, wall_timer
+
+
+class TracemallocBenchmark(TimeBenchmark):
+    """Benchmark for growth in process resident memory, repeating for accuracy.
+
+    Obviously limited as to what it actually measures : Relies on the current
+    process not having significant unused (de-allocated) memory when the
+    tested codeblock runs, and only reliable when the code allocates a
+    significant amount of new memory.
+
+    Benchmark operations prefixed with ``tracemalloc_`` or ``Tracemalloc`` will
+    use this benchmark class.
+
+    Inherits behaviour from :class:`asv_runner.benchmarks.time.TimeBenchmark`,
+    with modifications for memory measurement. See the below Attributes section
+    and https://asv.readthedocs.io/en/stable/writing_benchmarks.html#timing-benchmarks.
+
+    Attributes
+    ----------
+    Mostly identical to :class:`asv_runner.benchmarks.time.TimeBenchmark`. See
+    https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks
+    Make sure to use the inherited ``repeat`` attribute if greater accuracy
+    is needed. Below are the attributes where inherited behaviour is
+    overridden.
+
+    number : int
+        The number of times the benchmarked operation will be called per
+        ``repeat``. Memory growth is measured after ALL calls -
+        i.e. `number` should make no difference to the result if the operation
+        has perfect garbage collection. The parent class's intelligent
+        modification of `number` is NOT inherited. A minimum value of ``1`` is
+        enforced.
+    warmup_time, sample_time, min_run_count, timer
+        Not used.
+    type : str = "tracemalloc"
+        The name of this benchmark type.
+    unit : str = "bytes"
+        The units of the measured metric (i.e. the growth in memory).
+
+    """
+
+    name_regex = re.compile("^(Tracemalloc[A-Z_].+)|(tracemalloc_.+)$")
+
+    param: tuple
+
+    def __init__(self, name: str, func: Callable, attr_sources: list) -> None:
+        """Initialize a new instance of `TracemallocBenchmark`.
+
+        Parameters
+        ----------
+        name : str
+            The name of the benchmark.
+        func : callable
+            The function to benchmark.
+        attr_sources : list
+            A list of objects from which to draw attributes.
+        """
+        super().__init__(name, func, attr_sources)
+        self.type = "tracemalloc"
+        self.unit = "bytes"
+
+    def _load_vars(self):
+        """Load benchmark variables from attribute sources.
+
+        Downstream handling of ``number`` is not the same as in the parent, so
+        need to make sure it is at least 1.
+        """
+        super()._load_vars()
+        self.number = max(1, self.number)
+
+    def run(self, *param: tuple) -> dict:
+        """Run the benchmark with the given parameters.
+
+        Downstream handling of ``param`` is not the same as in the parent, so
+        need to store it now.
+
+        Parameters
+        ----------
+        *param : tuple
+            The parameters to pass to the benchmark function.
+
+        Returns
+        -------
+        dict
+            A dictionary with the benchmark results. It contains the samples
+            taken, and "the number of times the function was called in each
+            sample" - for this benchmark that is always ``1`` to avoid the
+            parent class incorrectly modifying the results.
+        """
+        self.param = param
+        return super().run(*param)
+
+    def benchmark_timing(
+        self,
+        timer: Timer,
+        min_repeat: int,
+        max_repeat: int,
+        max_time: float,
+        warmup_time: float,
+        number: int,
+        min_run_count: int,
+    ) -> tuple[list[int], int]:
+        """Benchmark the timing of the function execution.
+
+        Heavily modified from the parent method
+        - Directly performs setup and measurement (parent used timeit).
+        - `number` used differently (see Parameters).
+        - No warmup phase.
+
+        Parameters
+        ----------
+        timer : timeit.Timer
+            Not used.
+        min_repeat : int
+            The minimum number of times to repeat the function execution.
+        max_repeat : int
+            The maximum number of times to repeat the function execution.
+        max_time : float
+            The maximum total time to spend on the benchmarking.
+        warmup_time : float
+            Not used.
+        number : int
+            The number of times the benchmarked operation will be called per
+            repeat. Memory growth is measured after ALL calls - i.e. `number`
+            should make no difference to the result if the operation
+            has perfect garbage collection. The parent class's intelligent
+            modification of `number` is NOT inherited.
+        min_run_count : int
+            Not used.
+
+        Returns
+        -------
+        list
+            A list of the measured memory growths, in bytes.
+        int = 1
+            Part of the inherited return signature. Must be 1 to avoid
+            the parent incorrectly modifying the results.
+        """
+        start_time = wall_timer()
+        samples: list[int] = []
+
+        def too_slow(num_samples) -> bool:
+            """Stop taking samples if limits exceeded.
+
+            Parameters
+            ----------
+            num_samples : int
+                The number of samples taken so far.
+
+            Returns
+            -------
+            bool
+                True if the benchmark should stop, False otherwise.
+            """
+            if num_samples < min_repeat:
+                return False
+            return wall_timer() > start_time + max_time
+
+        # Collect samples
+        while len(samples) < max_repeat:
+            self.redo_setup()
+            tracemalloc.start()
+            for _ in range(number):
+                __ = self.func(*self.param)
+            _, peak_mem_bytes = tracemalloc.get_traced_memory()
+            tracemalloc.stop()
+
+            samples.append(peak_mem_bytes)
+
+            if too_slow(len(samples)):
+                break
+
+        # ``number`` is not used in the same way as in the parent class. Must
+        #  be returned as 1 to avoid parent incorrectly modifying the results.
+        return samples, 1
+
+
+# https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+export_as_benchmark = [TracemallocBenchmark]
diff --git a/ci/requirements/locks/environment-benchmark-linux-64.lock b/ci/requirements/locks/environment-benchmark-linux-64.lock
new file mode 100644
index 00000000000..c8c67b1bcf2
--- /dev/null
+++ b/ci/requirements/locks/environment-benchmark-linux-64.lock
@@ -0,0 +1,164 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: 98a4801aafacb13b98a04850d749d850a6ee57bf6f26506db814286373f2ebaf
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
+https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-headers-1.21.0-ha770c72_1.conda#9e298d76f543deb06eb0f3413675e13a
+https://conda.anaconda.org/conda-forge/linux-64/nlohmann_json-3.12.0-h3f2d84a_0.conda#d76872d096d063e226482c99337209dc
+https://conda.anaconda.org/conda-forge/noarch/nomkl-1.0-h5ca1d4c_0.tar.bz2#9a66894dfd07c4510beb6b3f9672ccc0
+https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-8_cp313.conda#94305520c52a4aa3f6c2b1ff6008d9f8
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a
+https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.8.3-hbd8a1cb_0.conda#74784ee3d225fc3dca89edb635b4e5cc
+https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1423503_1.conda#0be7c6e070c19105f966d3758448d018
+https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.1.0-h767d61c_5.conda#dcd5ff1940cd38f6df777cac86819d60
+https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.1.0-h767d61c_5.conda#264fbfba7fb20acf3b29cde153e345ce
+https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.2-h39aace5_0.conda#791365c5f65975051e4e017b5da3abf5
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.12.4-hb03c661_0.conda#ae5621814cb99642c9308977fe90ed0d
+https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc
+https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda#f7f0d6cc2dc986d42ac2689ec88192be
+https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda#b38117a3c920364aff79f870c984b4a3
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb03c661_4.conda#1d29d2e33fe59954af82ef54a8af3fe1
+https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.1-hecca717_0.conda#4211416ecba1866fab0c6470986c22d6
+https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.1.0-h69a702a_5.conda#069afdf8ea72504e48d23ae1171d951c
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.1.0-hcea5267_5.conda#fbd4008644add05032b6764807ee2cba
+https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda#915f5995e94f60e9a4826e0b0920ee88
+https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638
+https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc
+https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda#c7e925f37e3b40d893459e625f6a53f1
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.1.0-h8f9b012_5.conda#4e02a49aaa9d5190cb630fa43528fbe6
+https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.11.0-hb04c3b8_0.conda#34fb73fd2d5a613d8f17ce2eaa15a8a5
+https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.1-he9a06e4_0.conda#af930c65e9a79a3423d6d36e265cef65
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
+https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.3-h26f9b46_0.conda#72b3dd72e4f0b88cdacf3421313480f0
+https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda#a77f85f77be52ff59391544bfe73390a
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.9.2-he7b75e1_1.conda#c04d1312e7feec369308d656c18e7f3e
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.3.1-h92c474e_6.conda#3490e744cb8b9d5a3b9785839d618a17
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.2.4-h92c474e_1.conda#4ab554b102065910f098f88b40163835
+https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.2.7-h92c474e_2.conda#248831703050fe9a5b2680a7589fdba9
+https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda#d411fc29e338efb48c5fd4576d71d881
+https://conda.anaconda.org/conda-forge/linux-64/libabseil-20250512.1-cxx17_hba17884_0.conda#83b160d4da3e1e847bf044997621ed63
+https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.4-h3f801dc_0.conda#01ba04e414e47f95c03d6ddd81fd37be
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb03c661_4.conda#5cb5a1c9a94a78f5b23684bcb845338d
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb03c661_4.conda#2e55011fa483edb8bfe3fd92e860cd79
+https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b
+https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055
+https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.1.0-h69a702a_5.conda#0c91408b3dec0b97e8a3c694845bd63b
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.50.4-h0c1763c_0.conda#0b367fad34931cb79e0d6b7e5c06bb1c
+https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.1.0-h4852527_5.conda#8bba50c7f4679f08c861b597ad2bda6b
+https://conda.anaconda.org/conda-forge/linux-64/libzip-1.11.2-h6991a6a_0.conda#a7b27c075c9b7f459f1c022090697cba
+https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393
+https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446
+https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.23-h8e187f5_0.conda#edd15d7a5914dc1d87617a2b7c582d23
+https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.2-h03e3b7b_0.conda#3d8da0248bdae970b4ade636a104b7f5
+https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_hd72426e_102.conda#a0116df4f4ed05c303811a837d5b39d8
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.21.2-h6252d9a_1.conda#cf5e9b21384fdb75b15faf397551c247
+https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda#2c2fae981fd2afd00812c92ac47d023d
+https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda#ff862eebdfeb2fd048ae9dc92510baca
+https://conda.anaconda.org/conda-forge/linux-64/hdf4-4.2.15-h2a13503_7.conda#bd77f8da987968ec3927990495dc22e4
+https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
+https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400
+https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda#b499ce4b026493a13774bcf0f4c33849
+https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_2.conda#dfc5aae7b043d9f56ba99514d5e60625
+https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-6.31.1-h9ef548d_1.conda#b92e2a26764fcadb4304add7e698ccf2
+https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2025.08.12-h7b12aa8_1.conda#0a801dabf8776bb86b12091d2f99377e
+https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.22.0-h454ac66_1.conda#8ed82d90e6b1686f5e98f8b7825a15ef
+https://conda.anaconda.org/conda-forge/linux-64/python-3.13.7-h2b335a9_100_cp313.conda#724dcf9960e933838247971da07fe5cf
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.5.5-h149bd38_3.conda#f9bff8c2a205ee0f28b0c61dad849a98
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.10.4-h37a7233_0.conda#d828cb0be64d51e27eebe354a2907a98
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py313h7033f15_4.conda#bc8624c405856b1d047dd0a81829b08c
+https://conda.anaconda.org/conda-forge/noarch/certifi-2025.8.3-pyhd8ed1ab_0.conda#11f59985f49df4620890f3e746ed7102
+https://conda.anaconda.org/conda-forge/noarch/click-8.2.1-pyh707e725_0.conda#94b550b8d3a614dbd326af798c7dfb40
+https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.1-pyhd8ed1ab_0.conda#364ba6c9fb03886ac979b482f39ebb92
+https://conda.anaconda.org/conda-forge/linux-64/crc32c-2.7.1-py313h54dd161_2.conda#1b52ef3cbbb8a4108c78c7a73fe31450
+https://conda.anaconda.org/conda-forge/noarch/fsspec-2025.9.0-pyhd8ed1ab_0.conda#76f492bd8ba8a0fb80ffe16fc1a75b3b
+https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e
+https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac
+https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-35_h4a7cf45_openblas.conda#6da7e852c812a84096b68158574398d0
+https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.14.1-h332b0f4_0.conda#45f6713cb00f124af300342512219182
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-16-2.15.0-ha9997c6_0.conda#84bed2bfefc14e4878bd16979782e522
+https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.44.0-py313hfdae721_2.conda#dd0d7947635c0c524608eab7db55dcc9
+https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4
+https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py313h8060acc_1.conda#21b62c55924f01b6eef6827167b46acb
+https://conda.anaconda.org/conda-forge/linux-64/msgpack-python-1.1.1-py313h7037e92_1.conda#cc41d40a7ec345da56c496767d4bb61b
+https://conda.anaconda.org/conda-forge/noarch/opt_einsum-3.4.0-pyhd8ed1ab_1.conda#52919815cd35c4e1a0298af658ccda04
+https://conda.anaconda.org/conda-forge/linux-64/orc-2.2.0-h1bc01a4_0.conda#53ab33c0b0ba995d2546e54b2160f3fd
+https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9
+https://conda.anaconda.org/conda-forge/linux-64/psutil-7.0.0-py313h07c4f96_1.conda#5a7c24c9dc49128731ae565cf598cde4
+https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda#12c566707c80111f9799308d9e265aef
+https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac
+https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33
+https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960
+https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py313h8060acc_2.conda#50992ba61a8a1f8c2d346168ae1c86df
+https://conda.anaconda.org/conda-forge/linux-64/re2-2025.08.12-h5301d42_1.conda#4637c13ff87424af0f6a981ab6f5ffa5
+https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda#3339e3b65d58accf4ca4fb8748ab16b3
+https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda#0401a17ae845fa72c7210e206ec5647d
+https://conda.anaconda.org/conda-forge/noarch/tblib-3.1.0-pyhd8ed1ab_0.conda#a15c62b8a306b8978f094f76da2f903f
+https://conda.anaconda.org/conda-forge/noarch/toolz-1.0.0-pyhd8ed1ab_1.conda#40d0ed782a8aaa16ef248e68c06c168d
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.2-py313h07c4f96_1.conda#45821154b9cb2fb63c2b354c76086954
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda#0caa1af407ecff61170c9437a808404d
+https://conda.anaconda.org/conda-forge/linux-64/wrapt-1.17.3-py313h07c4f96_1.conda#c2662497e9a9ff2153753682f53989c9
+https://conda.anaconda.org/conda-forge/noarch/zict-3.0.0-pyhd8ed1ab_1.conda#e52c2ef711ccf31bb7f70ca87d144b9e
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhd8ed1ab_0.conda#df5e78d904988eb55042c0c97446079f
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.9.0-h0fbd49f_19.conda#24139f2990e92effbeb374a0eb33fdb1
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.13.3-h19deb91_3.conda#1680d64986f8263978c3624f677656c8
+https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.16.0-h3a458e0_1.conda#682cb082bbd998528c51f1e77d9ce415
+https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.1-py313hf01b4d8_1.conda#c4a0f01c46bc155d205694bec57bd709
+https://conda.anaconda.org/conda-forge/linux-64/cytoolz-1.0.1-py313h536fd9c_0.conda#e886bb6a3c24f8b9dd4fcd1d617a1f64
+https://conda.anaconda.org/conda-forge/noarch/deprecated-1.2.18-pyhd8ed1ab_0.conda#0cef44b1754ae4d6924ac0eef6b9fdbe
+https://conda.anaconda.org/conda-forge/noarch/donfig-0.8.1.post1-pyhd8ed1ab_1.conda#c56a7fa5597ad78b62e1f5d21f7f8b8f
+https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda#164fc43f0b53b6e3a7bc7dce5e4f1dc9
+https://conda.anaconda.org/conda-forge/linux-64/hdf5-1.14.6-nompi_h6e4c0c1_103.conda#c74d83614aec66227ae5199d98852aaf
+https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.7.0-pyhe01879c_1.conda#63ccfdc3a3ce25b027b8767eb722fca8
+https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646
+https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-35_h0358290_openblas.conda#8aa3389d36791ecd31602a247b1f3641
+https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.73.1-h1e535eb_0.conda#8075d8550f773a17288c7ec2cf2f2d56
+https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-35_h47877c9_openblas.conda#aa0b36b71d44f74686f13b9bfabec891
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.15.0-h26afc86_0.conda#c52b54db4660b44ca75b6a61c533b9f5
+https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda#0badf9c54e24cecfb0ad2f99d680c163
+https://conda.anaconda.org/conda-forge/linux-64/prometheus-cpp-1.3.0-ha5d0236_0.conda#a83f6a2fdc079e643237887a37460668
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda#5b8d21249ff20967101ffa321cab24e8
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.8.6-h800fcd2_2.conda#50e0900a33add0c715f17648de6be786
+https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.12.0-ha729027_0.conda#3dab8d6fa3d10fe4104f1fbe59c10176
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.10.0-h4bb41a7_3.conda#1efaf34774bfb92ecf2fa8fa985b2752
+https://conda.anaconda.org/conda-forge/noarch/dask-core-2025.9.1-pyhcf101f3_0.conda#c49de33395d775a92ea90e0cb34c3577
+https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.39.0-hdb79228_0.conda#a2e30ccd49f753fd30de0d30b1569789
+https://conda.anaconda.org/conda-forge/linux-64/libnetcdf-4.9.3-nompi_h11f7409_103.conda#3ccff1066c05a1e6c221356eecc40581
+https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-1.21.0-hb9b0907_1.conda#1c0320794855f457dea27d35c4c71e23
+https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py313h17eae1a_0.conda#7a2d2f9adecd86ed5c29c2115354f615
+https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.25.0-py313h54dd161_0.conda#1fe43bd1fc86e22ad3eb0edec637f8a2
+https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.33.1-hb4fd278_2.conda#81c545e27e527ca1be0cc04b74c20386
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.14.0-hb1c9500_1.conda#30da390c211967189c58f83ab58a6f0c
+https://conda.anaconda.org/conda-forge/linux-64/bottleneck-1.6.0-py313h29aa505_0.conda#02405ff909c10e59bf13527f8df3910c
+https://conda.anaconda.org/conda-forge/linux-64/cftime-1.6.4-py313h29aa505_2.conda#1363e8db910e403edc8fd486f8470ec6
+https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.39.0-hdbdcf42_0.conda#bd21962ff8a9d1ce4720d42a35a4af40
+https://conda.anaconda.org/conda-forge/linux-64/numba-0.61.2-py313h50b8c88_1.conda#53c79b7cdee329ed4c77cafe27600cdb
+https://conda.anaconda.org/conda-forge/linux-64/numcodecs-0.16.1-py313h08cd8bf_1.conda#5c1c296392a81820e2332b3315f58b66
+https://conda.anaconda.org/conda-forge/linux-64/numexpr-2.12.1-py313h24ae7f9_100.conda#8e5d3d84d8091537034c021420853613
+https://conda.anaconda.org/conda-forge/noarch/numpy_groupies-0.11.3-pyhd8ed1ab_0.conda#5402c2b046432ceb2d192a82802e7854
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.3.2-py313h08cd8bf_0.conda#5f4cc42e08d6d862b7b919a3c8959e0b
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.16.2-py313h11c21cd_0.conda#85a80978a04be9c290b8fe6d9bccff1c
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.5.0-pyhd8ed1ab_0.conda#436c165519e140cb08d246a4472a9d6a
+https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.606-h31ade35_1.conda#e33b3d2a2d44ba0fb35373d2343b71dd
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.12.0-h8b27e44_3.conda#7b738aea4f1b8ae2d1118156ad3ae993
+https://conda.anaconda.org/conda-forge/noarch/distributed-2025.9.1-pyhcf101f3_0.conda#f140b63da44c9a3fc7ae75cb9cc53c47
+https://conda.anaconda.org/conda-forge/noarch/flox-0.10.6-pyhd8ed1ab_0.conda#40136da5d8e93ccbd406518154763fd9
+https://conda.anaconda.org/conda-forge/linux-64/netcdf4-1.7.2-nompi_py313hfae5b86_104.conda#b6ddba788230a41a534cf288d41a1df4
+https://conda.anaconda.org/conda-forge/noarch/numbagg-0.9.2-pyhd8ed1ab_0.conda#5e01f678d82477576cb4d56cc6e9357f
+https://conda.anaconda.org/conda-forge/noarch/sparse-0.17.0-pyhcf101f3_0.conda#1b59de14a7e5888f939611e1fe329e00
+https://conda.anaconda.org/conda-forge/noarch/zarr-3.1.2-pyhcf101f3_0.conda#2bdb3950ea64a365bfe9e6414e748a9b
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-21.0.0-hb708d0b_3_cpu.conda#2d0305c8802fcba095d8d4e14e66ed3b
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-compute-21.0.0-h8c2c5c3_3_cpu.conda#b0b73752adfcbe6b73ef9f2eb5d5cf03
+https://conda.anaconda.org/conda-forge/linux-64/libparquet-21.0.0-h790f06f_3_cpu.conda#0568ba99a1f6c0ef7a04ca23dc78905a
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-21.0.0-h635bf11_3_cpu.conda#12fe67afbd946adae49856b275478d0f
+https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-21.0.0-py313he109ebe_0_cpu.conda#3018b7f30825c21c47a7a1e061459f96
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-21.0.0-h635bf11_3_cpu.conda#630dfffcaf67b800607164d4b5b08bf7
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-21.0.0-h3f74fd7_3_cpu.conda#595ca398ad8dcac76a315f358e3312a6
+https://conda.anaconda.org/conda-forge/linux-64/pyarrow-21.0.0-py313h78bf25f_0.conda#1580ddd94606ccb60270877cb8838562
diff --git a/noxfile.py b/noxfile.py
new file mode 100644
index 00000000000..a90ab3bf304
--- /dev/null
+++ b/noxfile.py
@@ -0,0 +1,292 @@
+"""Perform test automation with nox.
+
+For further details, see https://nox.thea.codes/en/stable/#
+
+"""
+
+import hashlib
+import os
+from pathlib import Path
+
+import nox
+from nox.logger import logger
+
+#: Default to reusing any pre-existing nox environments.
+nox.options.reuse_existing_virtualenvs = True
+
+#: Python versions we can run sessions under
+_PY_VERSIONS_ALL = ["3.11", "3.12", "3.13"]
+_PY_VERSION_LATEST = _PY_VERSIONS_ALL[-1]
+
+#: One specific python version for docs builds
+_PY_VERSION_DOCSBUILD = _PY_VERSION_LATEST
+
+#: Cirrus-CI environment variable hook.
+PY_VER = os.environ.get("PY_VER", _PY_VERSIONS_ALL)
+
+#: Default cartopy cache directory.
+CARTOPY_CACHE_DIR = os.environ.get("HOME") / Path(".local/share/cartopy")
+
+# https://github.com/numpy/numpy/pull/19478
+# https://github.com/matplotlib/matplotlib/pull/22099
+#: Common session environment variables.
+ENV = dict(NPY_DISABLE_CPU_FEATURES="AVX512F,AVX512CD,AVX512_SKX")
+
+
+def session_lockfile(session: nox.sessions.Session) -> Path:
+    """Return the path of the session lockfile."""
+    # return Path(f"ci/requirements/locks/py{session.python.replace('.', '')}-linux-64.lock")
+    return Path(f"ci/requirements/locks/environment-benchmark-linux-64.lock")
+
+
+def session_cachefile(session: nox.sessions.Session) -> Path:
+    """Return the path of the session lockfile cache."""
+    lockfile = session_lockfile(session)
+    tmp_dir = Path(session.create_tmp())
+    cache = tmp_dir / lockfile.name
+    return cache
+
+
+def venv_populated(session: nox.sessions.Session) -> bool:
+    """List of packages in the lockfile installed.
+
+    Returns True if the conda venv has been created.
+    """
+    return session_cachefile(session).is_file()
+
+
+def venv_changed(session: nox.sessions.Session) -> bool:
+    """Return True if the installed session is different.
+
+    Compares to that specified in the lockfile.
+    """
+    changed = False
+    cache = session_cachefile(session)
+    lockfile = session_lockfile(session)
+    if cache.is_file():
+        with open(lockfile, "rb") as fi:
+            expected = hashlib.sha256(fi.read()).hexdigest()
+        with open(cache, "r") as fi:
+            actual = fi.read()
+        changed = actual != expected
+    return changed
+
+
+def cache_venv(session: nox.sessions.Session) -> None:
+    """Cache the nox session environment.
+
+    This consists of saving a hexdigest (sha256) of the associated
+    conda lock file.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    lockfile = session_lockfile(session)
+    cache = session_cachefile(session)
+    with open(lockfile, "rb") as fi:
+        hexdigest = hashlib.sha256(fi.read()).hexdigest()
+    with open(cache, "w") as fout:
+        fout.write(hexdigest)
+
+
+def cache_cartopy(session: nox.sessions.Session) -> None:
+    """Determine whether to cache the cartopy natural earth shapefiles.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    if not CARTOPY_CACHE_DIR.is_dir():
+        session.run_always(
+            "python",
+            "-c",
+            "import cartopy; cartopy.io.shapereader.natural_earth()",
+        )
+
+
+def prepare_venv(session: nox.sessions.Session) -> None:
+    """Create and cache the nox session conda environment.
+
+    Additionally provide conda environment package details and info.
+
+    Note that, iris is installed into the environment using pip.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    Notes
+    -----
+    See
+      - https://github.com/theacodes/nox/issues/346
+      - https://github.com/theacodes/nox/issues/260
+
+    """
+    lockfile = session_lockfile(session)
+    venv_dir = session.virtualenv.location_name
+
+    if not venv_populated(session):
+        # environment has been created but packages not yet installed
+        # populate the environment from the lockfile
+        logger.debug(f"Populating conda env at {venv_dir} using {lockfile}")
+        session.conda_install("--file", str(lockfile))
+        cache_venv(session)
+
+    elif venv_changed(session):
+        # destroy the environment and rebuild it
+        logger.debug(f"Lockfile changed. Re-creating conda env at {venv_dir}")
+        _re_orig = session.virtualenv.reuse_existing
+        session.virtualenv.reuse_existing = False
+        session.virtualenv.create()
+        session.conda_install("--file", str(lockfile))
+        session.virtualenv.reuse_existing = _re_orig
+        cache_venv(session)
+
+    logger.debug(f"Environment {venv_dir} is up to date")
+
+    # cache_cartopy(session)
+
+    # Determine whether verbose diagnostics have been requested
+    # from the command line.
+    verbose = "-v" in session.posargs or "--verbose" in session.posargs
+
+    if verbose:
+        session.run_always("conda", "info")
+        session.run_always("conda", "list", f"--prefix={venv_dir}")
+        session.run_always(
+            "conda",
+            "list",
+            f"--prefix={venv_dir}",
+            "--explicit",
+        )
+
+
+@nox.session(python=PY_VER, venv_backend="conda")
+def tests(session: nox.sessions.Session):
+    """Perform iris system, integration and unit tests.
+
+    Coverage testing is enabled if the "--coverage" or "-c" flag is used.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    prepare_venv(session)
+    session.install("--no-deps", "--editable", ".")
+    session.env.update(ENV)
+    run_args = [
+        "pytest",
+        "-n",
+        "auto",
+        "lib/iris/tests",
+    ]
+    if "-c" in session.posargs or "--coverage" in session.posargs:
+        run_args[-1:-1] = ["--cov=lib/iris", "--cov-report=xml"]
+    session.run(*run_args)
+
+
+@nox.session(python=_PY_VERSION_DOCSBUILD, venv_backend="conda")
+def doctest(session: nox.sessions.Session):
+    """Perform iris doctests and gallery.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    prepare_venv(session)
+    session.install("--no-deps", "--editable", ".")
+    session.env.update(ENV)
+    session.cd("docs")
+    session.run(
+        "make",
+        "clean",
+        "html",
+        external=True,
+    )
+    session.run(
+        "make",
+        "doctest",
+        external=True,
+    )
+
+
+@nox.session(python=_PY_VERSION_DOCSBUILD, venv_backend="conda")
+def gallery(session: nox.sessions.Session):
+    """Perform iris gallery doc-tests.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    prepare_venv(session)
+    session.install("--no-deps", "--editable", ".")
+    session.env.update(ENV)
+    session.run(
+        "pytest",
+        "-n",
+        "auto",
+        "docs/gallery_tests",
+    )
+
+
+@nox.session(python=PY_VER, venv_backend="conda")
+def wheel(session: nox.sessions.Session):
+    """Perform iris local wheel install and import test.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    prepare_venv(session)
+    session.cd("dist")
+    fname = list(Path(".").glob("scitools_iris-*.whl"))
+    if len(fname) == 0:
+        raise ValueError("Cannot find wheel to install.")
+    if len(fname) > 1:
+        emsg = f"Expected to find 1 wheel to install, found {len(fname)} instead."
+        raise ValueError(emsg)
+    session.install(fname[0].name)
+    session.run(
+        "python",
+        "-c",
+        "import iris; print(f'{iris.__version__=}')",
+        external=True,
+    )
+
+
+@nox.session
+def benchmarks(session: nox.sessions.Session):
+    """Run the Iris benchmark runner. Run session with `-- --help` for help.
+
+    Parameters
+    ----------
+    session : object
+        A `nox.sessions.Session` object.
+
+    """
+    if len(session.posargs) == 0:
+        message = (
+            "This session MUST be run with at least one argument. The "
+            "arguments are passed down to the benchmark runner script. E.g:\n"
+            "nox -s benchmarks -- --help\n"
+            "nox -s benchmarks -- something --help\n"
+            "nox -s benchmarks -- something\n"
+        )
+        session.error(message)
+    session.install("asv", "nox")
+    bm_runner_path = Path(__file__).parent / "benchmarks" / "bm_runner.py"
+    session.run("python", bm_runner_path, *session.posargs)