From 345c4d95faf89717d8bfda29f233f6384f60d254 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:20:54 +0200 Subject: [PATCH 01/22] Try Iris benchmarks CI --- .github/workflows/benchmarks_report.yml | 83 ++ .github/workflows/benchmarks_run.yml | 171 ++++ .github/workflows/benchmarks_validate.yml | 48 ++ benchmarks/README.md | 175 ++++ benchmarks/_asv_delegated_abc.py | 249 ++++++ benchmarks/asv.conf.json | 27 + benchmarks/asv_delegated.py | 152 ++++ benchmarks/benchmarks/__init__.py | 74 ++ benchmarks/benchmarks/accessors.py | 25 + benchmarks/benchmarks/alignment.py | 54 ++ benchmarks/benchmarks/coding.py | 18 + benchmarks/benchmarks/combine.py | 79 ++ benchmarks/benchmarks/dataarray_missing.py | 72 ++ benchmarks/benchmarks/dataset.py | 32 + benchmarks/benchmarks/dataset_io.py | 755 ++++++++++++++++++ benchmarks/benchmarks/datatree.py | 15 + benchmarks/benchmarks/groupby.py | 191 +++++ benchmarks/benchmarks/import.py | 18 + benchmarks/benchmarks/indexing.py | 201 +++++ benchmarks/benchmarks/interp.py | 65 ++ benchmarks/benchmarks/merge.py | 77 ++ benchmarks/benchmarks/pandas.py | 64 ++ benchmarks/benchmarks/polyfit.py | 38 + benchmarks/benchmarks/reindexing.py | 52 ++ benchmarks/benchmarks/renaming.py | 27 + benchmarks/benchmarks/repr.py | 87 ++ benchmarks/benchmarks/rolling.py | 142 ++++ benchmarks/benchmarks/unstacking.py | 64 ++ benchmarks/benchmarks_iris/__init__.py | 76 ++ .../benchmarks_iris/aggregate_collapse.py | 212 +++++ benchmarks/benchmarks_iris/cperf/__init__.py | 92 +++ benchmarks/benchmarks_iris/cperf/equality.py | 55 ++ benchmarks/benchmarks_iris/cperf/load.py | 55 ++ benchmarks/benchmarks_iris/cperf/save.py | 40 + benchmarks/benchmarks_iris/cube.py | 116 +++ .../benchmarks_iris/generate_data/__init__.py | 135 ++++ .../benchmarks_iris/generate_data/stock.py | 184 +++++ .../benchmarks_iris/generate_data/ugrid.py | 190 +++++ .../benchmarks_iris/generate_data/um_files.py | 198 +++++ benchmarks/benchmarks_iris/import_iris.py | 278 +++++++ benchmarks/benchmarks_iris/iterate.py | 26 + benchmarks/benchmarks_iris/load/__init__.py | 221 +++++ benchmarks/benchmarks_iris/load/ugrid.py | 115 +++ benchmarks/benchmarks_iris/merge_concat.py | 72 ++ benchmarks/benchmarks_iris/mesh/__init__.py | 5 + .../benchmarks_iris/mesh/utils/__init__.py | 5 + .../mesh/utils/regions_combine.py | 227 ++++++ benchmarks/benchmarks_iris/plot.py | 34 + benchmarks/benchmarks_iris/regridding.py | 119 +++ benchmarks/benchmarks_iris/save.py | 43 + benchmarks/benchmarks_iris/sperf/__init__.py | 38 + .../benchmarks_iris/sperf/combine_regions.py | 234 ++++++ benchmarks/benchmarks_iris/sperf/equality.py | 35 + benchmarks/benchmarks_iris/sperf/load.py | 27 + benchmarks/benchmarks_iris/sperf/save.py | 50 ++ benchmarks/benchmarks_iris/stats.py | 52 ++ benchmarks/benchmarks_iris/trajectory.py | 56 ++ .../unit_style/__init__disabled.py | 16 + .../benchmarks_iris/unit_style/aux_factory.py | 52 ++ .../benchmarks_iris/unit_style/coords.py | 129 +++ benchmarks/benchmarks_iris/unit_style/cube.py | 252 ++++++ benchmarks/benchmarks_iris/unit_style/mesh.py | 187 +++++ .../unit_style/metadata_manager_factory.py | 83 ++ .../benchmarks_iris/unit_style/mixin.py | 78 ++ benchmarks/bm_runner.py | 739 +++++++++++++++++ benchmarks/custom_bms/README.md | 11 + benchmarks/custom_bms/install.py | 55 ++ benchmarks/custom_bms/tracemallocbench.py | 196 +++++ 68 files changed, 7843 insertions(+) create mode 100644 .github/workflows/benchmarks_report.yml create mode 100644 .github/workflows/benchmarks_run.yml create mode 100644 .github/workflows/benchmarks_validate.yml create mode 100644 benchmarks/README.md create mode 100644 benchmarks/_asv_delegated_abc.py create mode 100644 benchmarks/asv.conf.json create mode 100644 benchmarks/asv_delegated.py create mode 100644 benchmarks/benchmarks/__init__.py create mode 100644 benchmarks/benchmarks/accessors.py create mode 100644 benchmarks/benchmarks/alignment.py create mode 100644 benchmarks/benchmarks/coding.py create mode 100644 benchmarks/benchmarks/combine.py create mode 100644 benchmarks/benchmarks/dataarray_missing.py create mode 100644 benchmarks/benchmarks/dataset.py create mode 100644 benchmarks/benchmarks/dataset_io.py create mode 100644 benchmarks/benchmarks/datatree.py create mode 100644 benchmarks/benchmarks/groupby.py create mode 100644 benchmarks/benchmarks/import.py create mode 100644 benchmarks/benchmarks/indexing.py create mode 100644 benchmarks/benchmarks/interp.py create mode 100644 benchmarks/benchmarks/merge.py create mode 100644 benchmarks/benchmarks/pandas.py create mode 100644 benchmarks/benchmarks/polyfit.py create mode 100644 benchmarks/benchmarks/reindexing.py create mode 100644 benchmarks/benchmarks/renaming.py create mode 100644 benchmarks/benchmarks/repr.py create mode 100644 benchmarks/benchmarks/rolling.py create mode 100644 benchmarks/benchmarks/unstacking.py create mode 100644 benchmarks/benchmarks_iris/__init__.py create mode 100644 benchmarks/benchmarks_iris/aggregate_collapse.py create mode 100644 benchmarks/benchmarks_iris/cperf/__init__.py create mode 100644 benchmarks/benchmarks_iris/cperf/equality.py create mode 100644 benchmarks/benchmarks_iris/cperf/load.py create mode 100644 benchmarks/benchmarks_iris/cperf/save.py create mode 100644 benchmarks/benchmarks_iris/cube.py create mode 100644 benchmarks/benchmarks_iris/generate_data/__init__.py create mode 100644 benchmarks/benchmarks_iris/generate_data/stock.py create mode 100644 benchmarks/benchmarks_iris/generate_data/ugrid.py create mode 100644 benchmarks/benchmarks_iris/generate_data/um_files.py create mode 100644 benchmarks/benchmarks_iris/import_iris.py create mode 100644 benchmarks/benchmarks_iris/iterate.py create mode 100644 benchmarks/benchmarks_iris/load/__init__.py create mode 100644 benchmarks/benchmarks_iris/load/ugrid.py create mode 100644 benchmarks/benchmarks_iris/merge_concat.py create mode 100644 benchmarks/benchmarks_iris/mesh/__init__.py create mode 100644 benchmarks/benchmarks_iris/mesh/utils/__init__.py create mode 100644 benchmarks/benchmarks_iris/mesh/utils/regions_combine.py create mode 100644 benchmarks/benchmarks_iris/plot.py create mode 100644 benchmarks/benchmarks_iris/regridding.py create mode 100644 benchmarks/benchmarks_iris/save.py create mode 100644 benchmarks/benchmarks_iris/sperf/__init__.py create mode 100644 benchmarks/benchmarks_iris/sperf/combine_regions.py create mode 100644 benchmarks/benchmarks_iris/sperf/equality.py create mode 100644 benchmarks/benchmarks_iris/sperf/load.py create mode 100644 benchmarks/benchmarks_iris/sperf/save.py create mode 100644 benchmarks/benchmarks_iris/stats.py create mode 100644 benchmarks/benchmarks_iris/trajectory.py create mode 100644 benchmarks/benchmarks_iris/unit_style/__init__disabled.py create mode 100644 benchmarks/benchmarks_iris/unit_style/aux_factory.py create mode 100644 benchmarks/benchmarks_iris/unit_style/coords.py create mode 100644 benchmarks/benchmarks_iris/unit_style/cube.py create mode 100644 benchmarks/benchmarks_iris/unit_style/mesh.py create mode 100644 benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py create mode 100644 benchmarks/benchmarks_iris/unit_style/mixin.py create mode 100644 benchmarks/bm_runner.py create mode 100644 benchmarks/custom_bms/README.md create mode 100644 benchmarks/custom_bms/install.py create mode 100644 benchmarks/custom_bms/tracemallocbench.py diff --git a/.github/workflows/benchmarks_report.yml b/.github/workflows/benchmarks_report.yml new file mode 100644 index 00000000000..1de0f34b4c4 --- /dev/null +++ b/.github/workflows/benchmarks_report.yml @@ -0,0 +1,83 @@ +# Post any reports generated by benchmarks_run.yml . +# Separated for security: +# https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + +name: benchmarks-report +run-name: Report benchmark results + +on: + workflow_run: + workflows: [benchmarks-run] + types: + - completed + +jobs: + download: + runs-on: ubuntu-latest + outputs: + reports_exist: ${{ steps.unzip.outputs.reports_exist }} + steps: + - name: Download artifact + id: download-artifact + # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#using-data-from-the-triggering-workflow + uses: actions/github-script@v7 + with: + script: | + let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id, + }); + let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => { + return artifact.name == "benchmark_reports" + })[0]; + if (typeof matchArtifact != 'undefined') { + let download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: matchArtifact.id, + archive_format: 'zip', + }); + let fs = require('fs'); + fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/benchmark_reports.zip`, Buffer.from(download.data)); + }; + + - name: Unzip artifact + id: unzip + run: | + if test -f "benchmark_reports.zip"; then + reports_exist=1 + unzip benchmark_reports.zip -d benchmark_reports + else + reports_exist=0 + fi + echo "reports_exist=$reports_exist" >> "$GITHUB_OUTPUT" + + - name: Store artifact + uses: actions/upload-artifact@v4 + with: + name: benchmark_reports + path: benchmark_reports + + post_reports: + runs-on: ubuntu-latest + needs: download + if: needs.download.outputs.reports_exist == 1 + steps: + - name: Checkout repo + uses: actions/checkout@v5 + + - name: Download artifact + uses: actions/download-artifact@v5 + with: + name: benchmark_reports + path: .github/workflows/benchmark_reports + + - name: Set up Python + # benchmarks/bm_runner.py only needs builtins to run. + uses: actions/setup-python@v5 + + - name: Post reports + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: benchmarks/bm_runner.py _gh_post diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml new file mode 100644 index 00000000000..8b1966c0377 --- /dev/null +++ b/.github/workflows/benchmarks_run.yml @@ -0,0 +1,171 @@ +# Use ASV to check for performance regressions, either: +# - In the last 24 hours' commits. +# - Introduced by this pull request. + +name: benchmarks-run +run-name: Run benchmarks + +on: + schedule: + # Runs every day at 23:00. + - cron: "0 23 * * *" + workflow_dispatch: + inputs: + first_commit: + description: "First commit to benchmark (see bm_runner.py > Overnight)." + required: false + type: string + pull_request: + # Add the `labeled` type to the default list. + types: [labeled, opened, synchronize, reopened] + +jobs: + pre-checks: + # This workflow supports two different scenarios (overnight and branch). + # The pre-checks job determines which scenario is being run. + runs-on: ubuntu-latest + if: github.repository == 'SciTools/iris' + outputs: + overnight: ${{ steps.overnight.outputs.check }} + branch: ${{ steps.branch.outputs.check }} + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 2 + - id: files-changed + uses: marceloprado/has-changed-path@df1b7a3161b8fb9fd8c90403c66a9e66dfde50cb + with: + # SEE ALSO .github/labeler.yml . + paths: requirements/locks/*.lock + - id: overnight + name: Check overnight scenario + if: github.event_name != 'pull_request' + run: echo "check=true" >> "$GITHUB_OUTPUT" + - id: branch + name: Check branch scenario + if: > + github.event_name == 'pull_request' + && + ( + steps.files-changed.outputs.changed == 'true' + || + github.event.label.name == 'benchmark_this' + ) + run: echo "check=true" >> "$GITHUB_OUTPUT" + + + benchmark: + runs-on: ubuntu-latest + needs: pre-checks + if: > + needs.pre-checks.outputs.overnight == 'true' || + needs.pre-checks.outputs.branch == 'true' + + env: + IRIS_TEST_DATA_LOC_PATH: benchmarks + IRIS_TEST_DATA_PATH: benchmarks/iris-test-data + IRIS_TEST_DATA_VERSION: "2.28" + # Lets us manually bump the cache to rebuild + ENV_CACHE_BUILD: "0" + TEST_DATA_CACHE_BUILD: "2" + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - name: Checkout repo + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Install run dependencies + run: pip install asv nox!=2025.05.01 + + - name: Cache environment directories + id: cache-env-dir + uses: actions/cache@v4 + with: + path: | + .nox + benchmarks/.asv/env + $CONDA/pkgs + key: ${{ runner.os }}-${{ hashFiles('requirements/') }}-${{ env.ENV_CACHE_BUILD }} + + - name: Cache test data directory + id: cache-test-data + uses: actions/cache@v4 + with: + path: | + ${{ env.IRIS_TEST_DATA_PATH }} + key: + test-data-${{ env.IRIS_TEST_DATA_VERSION }}-${{ env.TEST_DATA_CACHE_BUILD }} + + - name: Fetch the test data + if: steps.cache-test-data.outputs.cache-hit != 'true' + run: | + wget --quiet https://github.com/SciTools/iris-test-data/archive/v${IRIS_TEST_DATA_VERSION}.zip -O iris-test-data.zip + unzip -q iris-test-data.zip + mkdir --parents ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_LOC_PATH} + mv iris-test-data-${IRIS_TEST_DATA_VERSION} ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH} + + - name: Set test data var + run: | + echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV + + - name: Benchmark this pull request + # If the 'branch' condition(s) are met: use the bm_runner to compare + # the proposed merge with the base branch. + if: needs.pre-checks.outputs.branch == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.number }} + run: | + nox -s benchmarks -- branch origin/${{ github.base_ref }} + + - name: Run overnight benchmarks + # If the 'overnight' condition(s) are met: use the bm_runner to compare + # each of the last 24 hours' commits to their parents. + id: overnight + if: needs.pre-checks.outputs.overnight == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # The first_commit argument allows a custom starting point - useful + # for manual re-running. + run: | + first_commit=${{ inputs.first_commit }} + if [ "$first_commit" == "" ] + then + first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1) + fi + + if [ "$first_commit" != "" ] + then + nox -s benchmarks -- overnight $first_commit + fi + + - name: Warn of failure + # The overnight run is not on a pull request, so a failure could go + # unnoticed without being actively advertised. + if: > + failure() && + steps.overnight.outcome == 'failure' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + title="Overnight benchmark workflow failed: \`${{ github.run_id }}\`" + body="Generated by GHA run [\`${{github.run_id}}\`](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})" + gh issue create --title "$title" --body "$body" --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY + + - name: Upload any benchmark reports + # Uploading enables more downstream processing e.g. posting a PR comment. + if: success() || steps.overnight.outcome == 'failure' + uses: actions/upload-artifact@v4 + with: + name: benchmark_reports + path: .github/workflows/benchmark_reports + + - name: Archive asv results + # Store the raw ASV database(s) to help manual investigations. + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: asv-raw-results + path: benchmarks/.asv/results diff --git a/.github/workflows/benchmarks_validate.yml b/.github/workflows/benchmarks_validate.yml new file mode 100644 index 00000000000..e3f090b32c0 --- /dev/null +++ b/.github/workflows/benchmarks_validate.yml @@ -0,0 +1,48 @@ +name: benchmarks-validate +run-name: Validate the benchmarking setup + +on: + push: + branches: + - "main" + - "v*x" + tags: + - "v*" + pull_request: + branches: + - "*" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + validate: + runs-on: ubuntu-latest + + env: + # Lets us manually bump the cache to rebuild + ENV_CACHE_BUILD: "0" + + steps: + - name: Checkout repo + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Install run dependencies + run: pip install asv nox!=2025.05.01 + + - name: Cache environment directories + id: cache-env-dir + uses: actions/cache@v4 + with: + path: | + .nox + benchmarks/.asv/env + $CONDA/pkgs + key: ${{ runner.os }}-${{ hashFiles('requirements/') }}-${{ env.ENV_CACHE_BUILD }} + + - name: Validate setup + run: nox -s benchmarks -- validate diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000000..09ea920176f --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,175 @@ +# SciTools Performance Benchmarking + +SciTools uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv) +(ASV) setup to benchmark performance. This is primarily designed to check for +performance shifts between commits using statistical analysis, but can also +be easily repurposed for manual comparative and scalability analyses. + +The benchmarks are automatically run overnight +[by a GitHub Action](../.github/workflows/benchmark.yml), with any notable +shifts in performance being flagged in a new GitHub issue. + +## Running benchmarks + +On GitHub: a Pull Request can be benchmarked by adding the +https://github.com/SciTools/iris/labels/benchmark_this +label to the PR (to run a second time: just remove and re-add the label). +Note that a benchmark run could take an hour or more to complete. +This runs a comparison between the PR branch's ``HEAD`` and its merge-base with +the PR's base branch, thus showing performance differences introduced +by the PR. (This run is managed by +[the aforementioned GitHub Action](../.github/workflows/benchmark.yml)). + +To run locally: the **benchmark runner** provides conveniences for +common benchmark setup and run tasks, including replicating the benchmarking +performed by GitHub Actions workflows. This can be accessed by: + +- The Nox `benchmarks` session - (use + `nox -s benchmarks -- --help` for details). +- `benchmarks/bm_runner.py` (use the `--help` argument for details). +- Directly running `asv` commands from the `benchmarks/` directory (check + whether environment setup has any extra dependencies - see + [Benchmark environments](#benchmark-environments)). + +### Reducing run time + +A significant portion of benchmark run time is environment management. Run-time +can be reduced by co-locating the benchmark environment and your +[Conda package cache](https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/custom-env-and-pkg-locations.html) +on the same [file system](https://en.wikipedia.org/wiki/File_system), if they +are not already. This can be done in several ways: + +- Temporarily reconfiguring `env_parent` in + [`_asv_delegated_abc`](_asv_delegated_abc.py) to reference a location on the same + file system as the Conda package cache. +- Using an alternative Conda package cache location during the benchmark run, + e.g. via the `$CONDA_PKGS_DIRS` environment variable. +- Moving your repo checkout to the same file system as the Conda package cache. + +### Environment variables + +* `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use +`iris-test-data` content, and your local `site.cfg` is not available for +benchmark scripts. The benchmark runner defers to any value already set in +the shell, but will otherwise download `iris-test-data` and set the variable +accordingly. +* `DATA_GEN_PYTHON` - required - path to a Python executable that can be +used to generate benchmark test objects/files; see +[Data generation](#data-generation). The benchmark runner sets this +automatically, but will defer to any value already set in the shell. Note that +[Mule](https://github.com/metomi/mule) will be automatically installed into +this environment, and sometimes +[iris-test-data](https://github.com/SciTools/iris-test-data) (see +`OVERRIDE_TEST_DATA_REPOSITORY`). +* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic +test data, which the benchmark scripts will create if it doesn't already +exist. Defaults to `/benchmarks/.data/` if not set. Note that some of +the generated files, especially in the 'SPerf' suite, are many GB in size so +plan accordingly. +* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks +decorated with `@on_demand_benchmark` are included in the ASV run. Usually +coupled with the ASV `--bench` argument to only run the benchmark(s) of +interest. Is set during the benchmark runner `cperf` and `sperf` sub-commands. +* `ASV_COMMIT_ENVS` - optional - instruct the +[delegated environment management](#benchmark-environments) to create a +dedicated environment for each commit being benchmarked when set (to any +value). This means that benchmarking commits with different environment +requirements will not be delayed by repeated environment setup - especially +relevant given the [benchmark runner](bm_runner.py)'s use of +[--interleave-rounds](https://asv.readthedocs.io/en/stable/commands.html?highlight=interleave-rounds#asv-run), +or any time you know you will repeatedly benchmark the same commit. **NOTE:** +SciTools environments tend to large so this option can consume a lot of disk +space. + +## Writing benchmarks + +[See the ASV docs](https://asv.readthedocs.io/) for full detail. + +### What benchmarks to write + +It is not possible to maintain a full suite of 'unit style' benchmarks: + +* Benchmarks take longer to run than tests. +* Small benchmarks are more vulnerable to noise - they report a lot of false +positive regressions. + +We therefore recommend writing benchmarks representing scripts or single +operations that are likely to be run at the user level. + +The drawback of this approach: a reported regression is less likely to reveal +the root cause (e.g. if a commit caused a regression in coordinate-creation +time, but the only benchmark covering this was for file-loading). Be prepared +for manual investigations; and consider committing any useful benchmarks as +[on-demand benchmarks](#on-demand-benchmarks) for future developers to use. + +### Data generation + +**Important:** be sure not to use the benchmarking environment to generate any +test objects/files, as this environment changes with each commit being +benchmarked, creating inconsistent benchmark 'conditions'. The +[generate_data](./benchmarks/generate_data/__init__.py) module offers a +solution; read more detail there. + +### ASV re-run behaviour + +Note that ASV re-runs a benchmark multiple times between its `setup()` routine. +This is a problem for benchmarking certain SciTools operations such as data +realisation, since the data will no longer be lazy after the first run. +Consider writing extra steps to restore objects' original state _within_ the +benchmark itself. + +If adding steps to the benchmark will skew the result too much then re-running +can be disabled by setting an attribute on the benchmark: `number = 1`. To +maintain result accuracy this should be accompanied by increasing the number of +repeats _between_ `setup()` calls using the `repeat` attribute. +`warmup_time = 0` is also advisable since ASV performs independent re-runs to +estimate run-time, and these will still be subject to the original problem. +The `@disable_repeat_between_setup` decorator in +[`benchmarks/__init__.py`](benchmarks/__init__.py) offers a convenience for +all this. + +### Custom benchmarks + +SciTools benchmarking implements custom benchmark types, such as a `tracemalloc` +benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more +detail. + +### Scaling / non-Scaling Performance Differences + +**(We no longer advocate the below for benchmarks run during CI, given the +limited available runtime and risk of false-positives. It remains useful for +manual investigations).** + +When comparing performance between commits/file-type/whatever it can be helpful +to know if the differences exist in scaling or non-scaling parts of the +operation under test. This can be done using a size parameter, setting +one value to be as small as possible (e.g. a scalar value), and the other to +be significantly larger (e.g. a 1000x1000 array). Performance differences +might only be seen for the larger value, or the smaller, or both, getting you +closer to the root cause. + +### On-demand benchmarks + +Some benchmarks provide useful insight but are inappropriate to be included in +a benchmark run by default, e.g. those with long run-times or requiring a local +file. These benchmarks should be decorated with `@on_demand_benchmark` +(see [benchmarks init](./benchmarks/__init__.py)), which +sets the benchmark to only be included in a run when the `ON_DEMAND_BENCHMARKS` +environment variable is set. Examples include the CPerf and SPerf benchmark +suites for the UK Met Office NG-VAT project. + +## Benchmark environments + +We have disabled ASV's standard environment management, instead using an +environment built using the same scripts that set up the package test +environments. +This is done using ASV's plugin architecture - see +[`asv_delegated.py`](asv_delegated.py) and associated +references in [`asv.conf.json`](asv.conf.json) (`environment_type` and +`plugins`). + +(ASV is written to control the environment(s) that benchmarks are run in - +minimising external factors and also allowing it to compare between a matrix +of dependencies (each in a separate environment). We have chosen to sacrifice +these features in favour of testing each commit with its intended dependencies, +controlled by the test environment setup script(s)). diff --git a/benchmarks/_asv_delegated_abc.py b/benchmarks/_asv_delegated_abc.py new file mode 100644 index 00000000000..0546a3c6a2d --- /dev/null +++ b/benchmarks/_asv_delegated_abc.py @@ -0,0 +1,249 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""ASV plug-in providing an alternative :class:`asv.environments.Environment` subclass. + +Preps an environment via custom user scripts, then uses that as the +benchmarking environment. + +This module is intended as the generic code that can be shared between +repositories. Providing a functional benchmarking environment relies on correct +subclassing of the :class:`_DelegatedABC` class to specialise it for the repo in +question. The parent and subclass are separated into their own dedicated files, +which isolates ALL repo-specific code to a single file, thus simplifying the +templating process. + +""" + +from abc import ABC, abstractmethod +from contextlib import contextmanager, suppress +from os import environ +from pathlib import Path +import sys + +from asv.console import log +from asv.environment import Environment, EnvironmentUnavailable +from asv.repo import Repo + + +class _DelegatedABC(Environment, ABC): + """Manage a benchmark environment using custom user scripts, run at each commit. + + Ignores user input variations - ``matrix`` / ``pythons`` / + ``exclude``, since environment is being managed outside ASV. + + A vanilla :class:`asv.environment.Environment` is created for containing + the expected ASV configuration files and checked-out project. The actual + 'functional' environment is created/updated using + :meth:`_prep_env_override`, then the location is recorded via + a symlink within the ASV environment. The symlink is used as the + environment path used for any executable calls (e.g. + ``python my_script.py``). + + Intended as the generic parent class that can be shared between + repositories. Providing a functional benchmarking environment relies on + correct subclassing of this class to specialise it for the repo in question. + + Warnings + -------- + :class:`_DelegatedABC` is an abstract base class. It MUST ONLY be used via + subclasses implementing their own :meth:`_prep_env_override`, and also + :attr:`tool_name`, which must be unique. + + """ + + tool_name = "delegated-ABC" + """Required by ASV as a unique identifier of the environment type.""" + + DELEGATED_LINK_NAME = "delegated_env" + """The name of the symlink to the delegated environment.""" + + COMMIT_ENVS_VAR = "ASV_COMMIT_ENVS" + """Env var that instructs a dedicated environment be created per commit.""" + + def __init__(self, conf, python, requirements, tagged_env_vars): + """Get a 'delegated' environment based on the given ASV config object. + + Parameters + ---------- + conf : dict + ASV configuration object. + + python : str + Ignored - environment management is delegated. The value is always + ``DELEGATED``. + + requirements : dict (str -> str) + Ignored - environment management is delegated. The value is always + an empty dict. + + tagged_env_vars : dict (tag, key) -> value + Ignored - environment management is delegated. The value is always + an empty dict. + + Raises + ------ + EnvironmentUnavailable + The original environment or delegated environment cannot be created. + + """ + ignored = [] + if python: + ignored.append(f"{python=}") + if requirements: + ignored.append(f"{requirements=}") + if tagged_env_vars: + ignored.append(f"{tagged_env_vars=}") + message = ( + f"Ignoring ASV setting(s): {', '.join(ignored)}. Benchmark " + "environment management is delegated to third party script(s)." + ) + log.warning(message) + self._python = "DELEGATED" + self._requirements = {} + self._tagged_env_vars = {} + super().__init__( + conf, + self._python, + self._requirements, + self._tagged_env_vars, + ) + + self._path_undelegated = Path(self._path) + """Preserves the 'true' path of the environment so that self._path can + be safely modified and restored.""" + + @property + def _path_delegated(self) -> Path: + """The path of the symlink to the delegated environment.""" + return self._path_undelegated / self.DELEGATED_LINK_NAME + + @property + def _delegated_found(self) -> bool: + """Whether self._path_delegated successfully resolves to a directory.""" + resolved = None + with suppress(FileNotFoundError): + resolved = self._path_delegated.resolve(strict=True) + result = resolved is not None and resolved.is_dir() + return result + + def _symlink_to_delegated(self, delegated_env_path: Path) -> None: + """Create the symlink to the delegated environment.""" + self._path_delegated.unlink(missing_ok=True) + self._path_delegated.parent.mkdir(parents=True, exist_ok=True) + self._path_delegated.symlink_to(delegated_env_path, target_is_directory=True) + assert self._delegated_found + + def _setup(self): + """Temporarily try to set the user's active env as the delegated env. + + Environment prep will be run anyway once ASV starts checking out + commits, but this step tries to provide a usable environment (with + python, etc.) at the moment that ASV expects it. + + """ + current_env = Path(sys.executable).parents[1] + message = ( + "Temporarily using user's active environment as benchmarking " + f"environment: {current_env} . " + ) + try: + self._symlink_to_delegated(current_env) + _ = self.find_executable("python") + except Exception: + message = ( + f"Delegated environment {self.name} not yet set up (unable to " + "determine current environment)." + ) + self._path_delegated.unlink(missing_ok=True) + + message += "Correct environment will be set up at the first commit checkout." + log.warning(message) + + @abstractmethod + def _prep_env_override(self, env_parent_dir: Path) -> Path: + """Run aspects of :meth:`_prep_env` that vary between repos. + + This is the method that is expected to do the preparing + (:meth:`_prep_env` only performs pre- and post- steps). MUST be + overridden in any subclass environments before they will work. + + Parameters + ---------- + env_parent_dir : Path + The directory that the prepared environment should be placed in. + + Returns + ------- + Path + The path to the prepared environment. + """ + pass + + def _prep_env(self, commit_hash: str) -> None: + """Prepare the delegated environment for the given commit hash.""" + message = ( + f"Running delegated environment management for: {self.name} " + f"at commit: {commit_hash[:8]}" + ) + log.info(message) + + env_parent = Path(self._env_dir).resolve() + new_env_per_commit = self.COMMIT_ENVS_VAR in environ + if new_env_per_commit: + env_parent = env_parent / commit_hash[:8] + + delegated_env_path = self._prep_env_override(env_parent) + assert delegated_env_path.is_relative_to(env_parent) + + # Record the environment's path via a symlink within this environment. + self._symlink_to_delegated(delegated_env_path) + + message = f"Environment {self.name} updated to spec at {commit_hash[:8]}" + log.info(message) + + def checkout_project(self, repo: Repo, commit_hash: str) -> None: + """Check out the working tree of the project at given commit hash.""" + super().checkout_project(repo, commit_hash) + self._prep_env(commit_hash) + + @contextmanager + def _delegate_path(self): + """Context manager to use the delegated env path as this env's path.""" + if not self._delegated_found: + message = f"Delegated environment not found at: {self._path_delegated}" + log.error(message) + raise EnvironmentUnavailable(message) + + try: + self._path = str(self._path_delegated) + yield + finally: + self._path = str(self._path_undelegated) + + def find_executable(self, executable): + """Find an executable (e.g. python, pip) in the DELEGATED environment. + + Raises + ------ + OSError + If the executable is not found in the environment. + """ + if not self._delegated_found: + # Required during environment setup. OSError expected if executable + # not found. + raise OSError + + with self._delegate_path(): + return super().find_executable(executable) + + def run_executable(self, executable, args, **kwargs): + """Run a given executable (e.g. python, pip) in the DELEGATED environment.""" + with self._delegate_path(): + return super().run_executable(executable, args, **kwargs) + + def run(self, args, **kwargs): + # This is not a specialisation - just implementing the abstract method. + log.debug(f"Running '{' '.join(args)}' in {self.name}") + return self.run_executable("python", args, **kwargs) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000000..bc0f6e55e35 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,27 @@ +{ + "version": 1, + "project": "scitools-iris", + "project_url": "https://github.com/SciTools/iris", + "repo": "..", + "environment_type": "delegated", + "show_commit_url": "https://github.com/scitools/iris/commit/", + "branches": ["upstream/main"], + + "benchmark_dir": "./benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "plugins": [".asv_delegated"], + + "command_comment": [ + "The inherited setup of the Iris test environment takes care of ", + "Iris-installation too, and in the case of Iris no specialised ", + "uninstall or build commands are needed to get it working either.", + + "We do however need to install the custom benchmarks for them to be", + "usable." + ], + "install_command": [], + "uninstall_command": [], + "build_command": ["python {conf_dir}/custom_bms/install.py"] +} diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py new file mode 100644 index 00000000000..bbb4b03ea56 --- /dev/null +++ b/benchmarks/asv_delegated.py @@ -0,0 +1,152 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Repository-specific adaptation of :mod:`_asv_delegated_abc`.""" + +import ast +import enum +from os import environ +from os.path import getmtime +from pathlib import Path +import re + +from asv import util as asv_util + +from _asv_delegated_abc import _DelegatedABC + + +class Delegated(_DelegatedABC): + """Specialism of :class:`_DelegatedABC` for benchmarking this repo.""" + + tool_name = "delegated" + + def _prep_env_override(self, env_parent_dir: Path) -> Path: + """Environment preparation specialised for this repo. + + Scans the checked-out commit of Iris to work out the appropriate + preparation command, including gathering any extra information that said + command needs. + + Parameters + ---------- + env_parent_dir : Path + The directory that the prepared environment should be placed in. + + Returns + ------- + Path + The path to the prepared environment. + """ + # The project checkout. + build_dir = Path(self._build_root) / self._repo_subdir + + # Older iterations of setup.py are incompatible with setuptools>=80. + # (Most dependencies are protected by lock-files, but build + # dependencies in pyproject.toml are independent). + setup_py = build_dir / "setup.py" + pyproject = build_dir / "pyproject.toml" + if setup_py.is_file() and "setuptools.command.develop" in setup_py.read_text(): + with pyproject.open("r+") as file_write: + lines = file_write.readlines() + for i, line in enumerate(lines): + if line == "requires = [\n": + next_line = lines[i + 1] + indent = next_line[: len(next_line) - len(next_line.lstrip())] + + lines.insert(i + 1, f'{indent}"setuptools<80",\n') + break + file_write.seek(0) + file_write.writelines(lines) + + class Mode(enum.Enum): + """The scenarios where the correct env setup script is known.""" + + NOX = enum.auto() + """``PY_VER=x.xx nox --session=tests --install-only`` is supported.""" + + mode = None + + noxfile = build_dir / "noxfile.py" + if noxfile.is_file(): + # Our noxfile originally did not support `--install-only` - you + # could either run the tests, or run nothing at all. Adding + # `run_always` to `prepare_venv` enabled environment setup without + # running tests. + noxfile_tree = ast.parse(source=noxfile.read_text()) + prep_session = next( + filter( + lambda node: getattr(node, "name", "") == "prepare_venv", + ast.walk(noxfile_tree), + ) + ) + prep_session_code = ast.unparse(prep_session) + if ( + "session.run(" not in prep_session_code + and "session.run_always(" in prep_session_code + ): + mode = Mode.NOX + + match mode: + # Just NOX for now but the architecture is here for future cases. + case Mode.NOX: + # Need to determine a single Python version to run with. + req_dir = build_dir / "requirements" + lockfile_dir = req_dir / "locks" + if not lockfile_dir.is_dir(): + lockfile_dir = req_dir / "ci" / "nox.lock" + + if not lockfile_dir.is_dir(): + message = "No lockfile directory found in the expected locations." + raise FileNotFoundError(message) + + def py_ver_from_lockfiles(lockfile: Path) -> str: + pattern = re.compile(r"py(\d+)-") + search = pattern.search(lockfile.name) + assert search is not None + version = search.group(1) + return f"{version[0]}.{version[1:]}" + + python_versions = [ + py_ver_from_lockfiles(lockfile) + for lockfile in lockfile_dir.glob("*.lock") + ] + python_version = max(python_versions) + + # Construct and run the environment preparation command. + local_envs = dict(environ) + local_envs["PY_VER"] = python_version + # Prevent Nox re-using env with wrong Python version. + env_parent_dir = ( + env_parent_dir / f"nox{python_version.replace('.', '')}" + ) + env_command = [ + "nox", + f"--envdir={env_parent_dir}", + "--session=tests", + "--install-only", + "--no-error-on-external-run", + "--verbose", + ] + _ = asv_util.check_output( + env_command, + timeout=self._install_timeout, + cwd=build_dir, + env=local_envs, + ) + + env_parent_contents = list(env_parent_dir.iterdir()) + if len(env_parent_contents) != 1: + message = ( + f"{env_parent_dir} contains {len(env_parent_contents)} " + "items, expected 1. Cannot determine the environment " + "directory." + ) + raise FileNotFoundError(message) + (delegated_env_path,) = env_parent_contents + + case _: + message = "No environment setup is known for this commit of Iris." + raise NotImplementedError(message) + + return delegated_env_path diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 00000000000..b8a54f71a42 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1,74 @@ +import itertools +import os + +import numpy as np + +_counter = itertools.count() + + +def parameterized(names, params): + def decorator(func): + func.param_names = names + func.params = params + return func + + return decorator + + +def requires_dask(): + try: + import dask # noqa: F401 + except ImportError as err: + raise NotImplementedError() from err + + +def requires_sparse(): + try: + import sparse # noqa: F401 + except ImportError as err: + raise NotImplementedError() from err + + +def randn(shape, frac_nan=None, chunks=None, seed=0): + rng = np.random.default_rng(seed) + if chunks is None: + x = rng.standard_normal(shape) + else: + import dask.array as da + + rng = da.random.default_rng(seed) + x = rng.standard_normal(shape, chunks=chunks) + + if frac_nan is not None: + inds = rng.choice(range(x.size), int(x.size * frac_nan)) + x.flat[inds] = np.nan + + return x + + +def randint(low, high=None, size=None, frac_minus=None, seed=0): + rng = np.random.default_rng(seed) + x = rng.integers(low, high, size) + if frac_minus is not None: + inds = rng.choice(range(x.size), int(x.size * frac_minus)) + x.flat[inds] = -1 + + return x + + +def _skip_slow(): + """ + Use this function to skip slow or highly demanding tests. + + Use it as a `Class.setup` method or a `function.setup` attribute. + + Examples + -------- + >>> from . import _skip_slow + >>> def time_something_slow(): + ... pass + ... + >>> time_something.setup = _skip_slow + """ + if os.environ.get("ASV_SKIP_SLOW", "0") == "1": + raise NotImplementedError("Skipping this test...") diff --git a/benchmarks/benchmarks/accessors.py b/benchmarks/benchmarks/accessors.py new file mode 100644 index 00000000000..259c06160ac --- /dev/null +++ b/benchmarks/benchmarks/accessors.py @@ -0,0 +1,25 @@ +import numpy as np + +import xarray as xr + +from . import parameterized + +NTIME = 365 * 30 + + +@parameterized(["calendar"], [("standard", "noleap")]) +class DateTimeAccessor: + def setup(self, calendar): + np.random.randn(NTIME) + time = xr.date_range("2000", periods=30 * 365, calendar=calendar) + data = np.ones((NTIME,)) + self.da = xr.DataArray(data, dims="time", coords={"time": time}) + + def time_dayofyear(self, calendar): + _ = self.da.time.dt.dayofyear + + def time_year(self, calendar): + _ = self.da.time.dt.year + + def time_floor(self, calendar): + _ = self.da.time.dt.floor("D") diff --git a/benchmarks/benchmarks/alignment.py b/benchmarks/benchmarks/alignment.py new file mode 100644 index 00000000000..5a6ee3fa0a6 --- /dev/null +++ b/benchmarks/benchmarks/alignment.py @@ -0,0 +1,54 @@ +import numpy as np + +import xarray as xr + +from . import parameterized, requires_dask + +ntime = 365 * 30 +nx = 50 +ny = 50 + +rng = np.random.default_rng(0) + + +class Align: + def setup(self, *args, **kwargs): + data = rng.standard_normal((ntime, nx, ny)) + self.ds = xr.Dataset( + {"temperature": (("time", "x", "y"), data)}, + coords={ + "time": xr.date_range("2000", periods=ntime), + "x": np.arange(nx), + "y": np.arange(ny), + }, + ) + self.year = self.ds.time.dt.year + self.idx = np.unique(rng.integers(low=0, high=ntime, size=ntime // 2)) + self.year_subset = self.year.isel(time=self.idx) + + @parameterized(["join"], [("outer", "inner", "left", "right", "exact", "override")]) + def time_already_aligned(self, join): + xr.align(self.ds, self.year, join=join) + + @parameterized(["join"], [("outer", "inner", "left", "right")]) + def time_not_aligned(self, join): + xr.align(self.ds, self.year[-100:], join=join) + + @parameterized(["join"], [("outer", "inner", "left", "right")]) + def time_not_aligned_random_integers(self, join): + xr.align(self.ds, self.year_subset, join=join) + + +class AlignCFTime(Align): + def setup(self, *args, **kwargs): + super().setup() + self.ds["time"] = xr.date_range("2000", periods=ntime, calendar="noleap") + self.year = self.ds.time.dt.year + self.year_subset = self.year.isel(time=self.idx) + + +class AlignDask(Align): + def setup(self, *args, **kwargs): + requires_dask() + super().setup() + self.ds = self.ds.chunk({"time": 100}) diff --git a/benchmarks/benchmarks/coding.py b/benchmarks/benchmarks/coding.py new file mode 100644 index 00000000000..c39555243c0 --- /dev/null +++ b/benchmarks/benchmarks/coding.py @@ -0,0 +1,18 @@ +import numpy as np + +import xarray as xr + +from . import parameterized + + +@parameterized(["calendar"], [("standard", "noleap")]) +class EncodeCFDatetime: + def setup(self, calendar): + self.units = "days since 2000-01-01" + self.dtype = np.dtype("int64") + self.times = xr.date_range( + "2000", freq="D", periods=10000, calendar=calendar + ).values + + def time_encode_cf_datetime(self, calendar): + xr.coding.times.encode_cf_datetime(self.times, self.units, calendar, self.dtype) diff --git a/benchmarks/benchmarks/combine.py b/benchmarks/benchmarks/combine.py new file mode 100644 index 00000000000..772d888306c --- /dev/null +++ b/benchmarks/benchmarks/combine.py @@ -0,0 +1,79 @@ +import numpy as np + +import xarray as xr + +from . import requires_dask + + +class Combine1d: + """Benchmark concatenating and merging large datasets""" + + def setup(self) -> None: + """Create 2 datasets with two different variables""" + + t_size = 8000 + t = np.arange(t_size) + data = np.random.randn(t_size) + + self.dsA0 = xr.Dataset({"A": xr.DataArray(data, coords={"T": t}, dims=("T"))}) + self.dsA1 = xr.Dataset( + {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T"))} + ) + + def time_combine_by_coords(self) -> None: + """Also has to load and arrange t coordinate""" + datasets = [self.dsA0, self.dsA1] + + xr.combine_by_coords(datasets) + + +class Combine1dDask(Combine1d): + """Benchmark concatenating and merging large datasets""" + + def setup(self) -> None: + """Create 2 datasets with two different variables""" + requires_dask() + + t_size = 8000 + t = np.arange(t_size) + var = xr.Variable(dims=("T",), data=np.random.randn(t_size)).chunk() + + data_vars = {f"long_name_{v}": ("T", var) for v in range(500)} + + self.dsA0 = xr.Dataset(data_vars, coords={"T": t}) + self.dsA1 = xr.Dataset(data_vars, coords={"T": t + t_size}) + + +class Combine3d: + """Benchmark concatenating and merging large datasets""" + + def setup(self): + """Create 4 datasets with two different variables""" + + t_size, x_size, y_size = 50, 450, 400 + t = np.arange(t_size) + data = np.random.randn(t_size, x_size, y_size) + + self.dsA0 = xr.Dataset( + {"A": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))} + ) + self.dsA1 = xr.Dataset( + {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))} + ) + self.dsB0 = xr.Dataset( + {"B": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))} + ) + self.dsB1 = xr.Dataset( + {"B": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))} + ) + + def time_combine_nested(self): + datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]] + + xr.combine_nested(datasets, concat_dim=[None, "T"]) + + def time_combine_by_coords(self): + """Also has to load and arrange t coordinate""" + datasets = [self.dsA0, self.dsA1, self.dsB0, self.dsB1] + + xr.combine_by_coords(datasets) diff --git a/benchmarks/benchmarks/dataarray_missing.py b/benchmarks/benchmarks/dataarray_missing.py new file mode 100644 index 00000000000..83de65b7fe4 --- /dev/null +++ b/benchmarks/benchmarks/dataarray_missing.py @@ -0,0 +1,72 @@ +import pandas as pd + +import xarray as xr + +from . import parameterized, randn, requires_dask + + +def make_bench_data(shape, frac_nan, chunks): + vals = randn(shape, frac_nan) + coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])} + da = xr.DataArray(vals, dims=("time", "x", "y"), coords=coords) + + if chunks is not None: + da = da.chunk(chunks) + + return da + + +class DataArrayMissingInterpolateNA: + def setup(self, shape, chunks, limit): + if chunks is not None: + requires_dask() + self.da = make_bench_data(shape, 0.1, chunks) + + @parameterized( + ["shape", "chunks", "limit"], + ( + [(365, 75, 75)], + [None, {"x": 25, "y": 25}], + [None, 3], + ), + ) + def time_interpolate_na(self, shape, chunks, limit): + actual = self.da.interpolate_na(dim="time", method="linear", limit=limit) + + if chunks is not None: + actual = actual.compute() + + +class DataArrayMissingBottleneck: + def setup(self, shape, chunks, limit): + if chunks is not None: + requires_dask() + self.da = make_bench_data(shape, 0.1, chunks) + + @parameterized( + ["shape", "chunks", "limit"], + ( + [(365, 75, 75)], + [None, {"x": 25, "y": 25}], + [None, 3], + ), + ) + def time_ffill(self, shape, chunks, limit): + actual = self.da.ffill(dim="time", limit=limit) + + if chunks is not None: + actual = actual.compute() + + @parameterized( + ["shape", "chunks", "limit"], + ( + [(365, 75, 75)], + [None, {"x": 25, "y": 25}], + [None, 3], + ), + ) + def time_bfill(self, shape, chunks, limit): + actual = self.da.bfill(dim="time", limit=limit) + + if chunks is not None: + actual = actual.compute() diff --git a/benchmarks/benchmarks/dataset.py b/benchmarks/benchmarks/dataset.py new file mode 100644 index 00000000000..d8a6d6df9d8 --- /dev/null +++ b/benchmarks/benchmarks/dataset.py @@ -0,0 +1,32 @@ +import numpy as np + +from xarray import Dataset + +from . import requires_dask + + +class DatasetBinaryOp: + def setup(self): + self.ds = Dataset( + { + "a": (("x", "y"), np.ones((300, 400))), + "b": (("x", "y"), np.ones((300, 400))), + } + ) + self.mean = self.ds.mean() + self.std = self.ds.std() + + def time_normalize(self): + (self.ds - self.mean) / self.std + + +class DatasetChunk: + def setup(self): + requires_dask() + self.ds = Dataset() + array = np.ones(1000) + for i in range(250): + self.ds[f"var{i}"] = ("x", array) + + def time_chunk(self): + self.ds.chunk(x=(1,) * 1000) diff --git a/benchmarks/benchmarks/dataset_io.py b/benchmarks/benchmarks/dataset_io.py new file mode 100644 index 00000000000..b8afabe802e --- /dev/null +++ b/benchmarks/benchmarks/dataset_io.py @@ -0,0 +1,755 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + +import numpy as np +import pandas as pd + +import xarray as xr + +from . import _skip_slow, parameterized, randint, randn, requires_dask + +try: + import dask + import dask.multiprocessing +except ImportError: + pass + +os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" + +_ENGINES = tuple(xr.backends.list_engines().keys() - {"store"}) + + +class IOSingleNetCDF: + """ + A few examples that benchmark reading/writing a single netCDF file with + xarray + """ + + timeout = 300.0 + repeat = 1 + number = 5 + + def make_ds(self): + # single Dataset + self.ds = xr.Dataset() + self.nt = 1000 + self.nx = 90 + self.ny = 45 + + self.block_chunks = { + "time": self.nt / 4, + "lon": self.nx / 3, + "lat": self.ny / 3, + } + + self.time_chunks = {"time": int(self.nt / 36)} + + times = pd.date_range("1970-01-01", periods=self.nt, freq="D") + lons = xr.DataArray( + np.linspace(0, 360, self.nx), + dims=("lon",), + attrs={"units": "degrees east", "long_name": "longitude"}, + ) + lats = xr.DataArray( + np.linspace(-90, 90, self.ny), + dims=("lat",), + attrs={"units": "degrees north", "long_name": "latitude"}, + ) + self.ds["foo"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="foo", + attrs={"units": "foo units", "description": "a description"}, + ) + self.ds["bar"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="bar", + attrs={"units": "bar units", "description": "a description"}, + ) + self.ds["baz"] = xr.DataArray( + randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), + coords={"lon": lons, "lat": lats}, + dims=("lon", "lat"), + name="baz", + attrs={"units": "baz units", "description": "a description"}, + ) + + self.ds.attrs = {"history": "created for xarray benchmarking"} + + self.oinds = { + "time": randint(0, self.nt, 120), + "lon": randint(0, self.nx, 20), + "lat": randint(0, self.ny, 10), + } + self.vinds = { + "time": xr.DataArray(randint(0, self.nt, 120), dims="x"), + "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"), + "lat": slice(3, 20), + } + + +class IOWriteSingleNetCDF3(IOSingleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + self.format = "NETCDF3_64BIT" + self.make_ds() + + def time_write_dataset_netcdf4(self): + self.ds.to_netcdf("test_netcdf4_write.nc", engine="netcdf4", format=self.format) + + def time_write_dataset_scipy(self): + self.ds.to_netcdf("test_scipy_write.nc", engine="scipy", format=self.format) + + +class IOReadSingleNetCDF4(IOSingleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + self.make_ds() + + self.filepath = "test_single_file.nc4.nc" + self.format = "NETCDF4" + self.ds.to_netcdf(self.filepath, format=self.format) + + def time_load_dataset_netcdf4(self): + xr.open_dataset(self.filepath, engine="netcdf4").load() + + def time_orthogonal_indexing(self): + ds = xr.open_dataset(self.filepath, engine="netcdf4") + ds = ds.isel(**self.oinds).load() + + def time_vectorized_indexing(self): + ds = xr.open_dataset(self.filepath, engine="netcdf4") + ds = ds.isel(**self.vinds).load() + + +class IOReadSingleNetCDF3(IOReadSingleNetCDF4): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + self.make_ds() + + self.filepath = "test_single_file.nc3.nc" + self.format = "NETCDF3_64BIT" + self.ds.to_netcdf(self.filepath, format=self.format) + + def time_load_dataset_scipy(self): + xr.open_dataset(self.filepath, engine="scipy").load() + + def time_orthogonal_indexing(self): + ds = xr.open_dataset(self.filepath, engine="scipy") + ds = ds.isel(**self.oinds).load() + + def time_vectorized_indexing(self): + ds = xr.open_dataset(self.filepath, engine="scipy") + ds = ds.isel(**self.vinds).load() + + +class IOReadSingleNetCDF4Dask(IOSingleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + + self.filepath = "test_single_file.nc4.nc" + self.format = "NETCDF4" + self.ds.to_netcdf(self.filepath, format=self.format) + + def time_load_dataset_netcdf4_with_block_chunks(self): + xr.open_dataset( + self.filepath, engine="netcdf4", chunks=self.block_chunks + ).load() + + def time_load_dataset_netcdf4_with_block_chunks_oindexing(self): + ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks) + ds = ds.isel(**self.oinds).load() + + def time_load_dataset_netcdf4_with_block_chunks_vindexing(self): + ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks) + ds = ds.isel(**self.vinds).load() + + def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_dataset( + self.filepath, engine="netcdf4", chunks=self.block_chunks + ).load() + + def time_load_dataset_netcdf4_with_time_chunks(self): + xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.time_chunks).load() + + def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_dataset( + self.filepath, engine="netcdf4", chunks=self.time_chunks + ).load() + + +class IOReadSingleNetCDF3Dask(IOReadSingleNetCDF4Dask): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + + self.filepath = "test_single_file.nc3.nc" + self.format = "NETCDF3_64BIT" + self.ds.to_netcdf(self.filepath, format=self.format) + + def time_load_dataset_scipy_with_block_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_dataset( + self.filepath, engine="scipy", chunks=self.block_chunks + ).load() + + def time_load_dataset_scipy_with_block_chunks_oindexing(self): + ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks) + ds = ds.isel(**self.oinds).load() + + def time_load_dataset_scipy_with_block_chunks_vindexing(self): + ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks) + ds = ds.isel(**self.vinds).load() + + def time_load_dataset_scipy_with_time_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_dataset( + self.filepath, engine="scipy", chunks=self.time_chunks + ).load() + + +class IOMultipleNetCDF: + """ + A few examples that benchmark reading/writing multiple netCDF files with + xarray + """ + + timeout = 300.0 + repeat = 1 + number = 5 + + def make_ds(self, nfiles=10): + # multiple Dataset + self.ds = xr.Dataset() + self.nt = 1000 + self.nx = 90 + self.ny = 45 + self.nfiles = nfiles + + self.block_chunks = { + "time": self.nt / 4, + "lon": self.nx / 3, + "lat": self.ny / 3, + } + + self.time_chunks = {"time": int(self.nt / 36)} + + self.time_vars = np.split( + pd.date_range("1970-01-01", periods=self.nt, freq="D"), self.nfiles + ) + + self.ds_list = [] + self.filenames_list = [] + for i, times in enumerate(self.time_vars): + ds = xr.Dataset() + nt = len(times) + lons = xr.DataArray( + np.linspace(0, 360, self.nx), + dims=("lon",), + attrs={"units": "degrees east", "long_name": "longitude"}, + ) + lats = xr.DataArray( + np.linspace(-90, 90, self.ny), + dims=("lat",), + attrs={"units": "degrees north", "long_name": "latitude"}, + ) + ds["foo"] = xr.DataArray( + randn((nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="foo", + attrs={"units": "foo units", "description": "a description"}, + ) + ds["bar"] = xr.DataArray( + randn((nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="bar", + attrs={"units": "bar units", "description": "a description"}, + ) + ds["baz"] = xr.DataArray( + randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), + coords={"lon": lons, "lat": lats}, + dims=("lon", "lat"), + name="baz", + attrs={"units": "baz units", "description": "a description"}, + ) + + ds.attrs = {"history": "created for xarray benchmarking"} + + self.ds_list.append(ds) + self.filenames_list.append(f"test_netcdf_{i}.nc") + + +class IOWriteMultipleNetCDF3(IOMultipleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + self.make_ds() + self.format = "NETCDF3_64BIT" + + def time_write_dataset_netcdf4(self): + xr.save_mfdataset( + self.ds_list, self.filenames_list, engine="netcdf4", format=self.format + ) + + def time_write_dataset_scipy(self): + xr.save_mfdataset( + self.ds_list, self.filenames_list, engine="scipy", format=self.format + ) + + +class IOReadMultipleNetCDF4(IOMultipleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + self.format = "NETCDF4" + xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) + + def time_load_dataset_netcdf4(self): + xr.open_mfdataset(self.filenames_list, engine="netcdf4").load() + + def time_open_dataset_netcdf4(self): + xr.open_mfdataset(self.filenames_list, engine="netcdf4") + + +class IOReadMultipleNetCDF3(IOReadMultipleNetCDF4): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + self.format = "NETCDF3_64BIT" + xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) + + def time_load_dataset_scipy(self): + xr.open_mfdataset(self.filenames_list, engine="scipy").load() + + def time_open_dataset_scipy(self): + xr.open_mfdataset(self.filenames_list, engine="scipy") + + +class IOReadMultipleNetCDF4Dask(IOMultipleNetCDF): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + self.format = "NETCDF4" + xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) + + def time_load_dataset_netcdf4_with_block_chunks(self): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.block_chunks + ).load() + + def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.block_chunks + ).load() + + def time_load_dataset_netcdf4_with_time_chunks(self): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.time_chunks + ).load() + + def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.time_chunks + ).load() + + def time_open_dataset_netcdf4_with_block_chunks(self): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.block_chunks + ) + + def time_open_dataset_netcdf4_with_block_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.block_chunks + ) + + def time_open_dataset_netcdf4_with_time_chunks(self): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.time_chunks + ) + + def time_open_dataset_netcdf4_with_time_chunks_multiprocessing(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="netcdf4", chunks=self.time_chunks + ) + + +class IOReadMultipleNetCDF3Dask(IOReadMultipleNetCDF4Dask): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_ds() + self.format = "NETCDF3_64BIT" + xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) + + def time_load_dataset_scipy_with_block_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="scipy", chunks=self.block_chunks + ).load() + + def time_load_dataset_scipy_with_time_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="scipy", chunks=self.time_chunks + ).load() + + def time_open_dataset_scipy_with_block_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="scipy", chunks=self.block_chunks + ) + + def time_open_dataset_scipy_with_time_chunks(self): + with dask.config.set(scheduler="multiprocessing"): + xr.open_mfdataset( + self.filenames_list, engine="scipy", chunks=self.time_chunks + ) + + +def create_delayed_write(): + import dask.array as da + + vals = da.random.random(300, chunks=(1,)) + ds = xr.Dataset({"vals": (["a"], vals)}) + return ds.to_netcdf("file.nc", engine="netcdf4", compute=False) + + +class IONestedDataTree: + """ + A few examples that benchmark reading/writing a heavily nested netCDF datatree with + xarray + """ + + timeout = 300.0 + repeat = 1 + number = 5 + + def make_datatree(self, nchildren=10): + # multiple Dataset + self.ds = xr.Dataset() + self.nt = 1000 + self.nx = 90 + self.ny = 45 + self.nchildren = nchildren + + self.block_chunks = { + "time": self.nt / 4, + "lon": self.nx / 3, + "lat": self.ny / 3, + } + + self.time_chunks = {"time": int(self.nt / 36)} + + times = pd.date_range("1970-01-01", periods=self.nt, freq="D") + lons = xr.DataArray( + np.linspace(0, 360, self.nx), + dims=("lon",), + attrs={"units": "degrees east", "long_name": "longitude"}, + ) + lats = xr.DataArray( + np.linspace(-90, 90, self.ny), + dims=("lat",), + attrs={"units": "degrees north", "long_name": "latitude"}, + ) + self.ds["foo"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="foo", + attrs={"units": "foo units", "description": "a description"}, + ) + self.ds["bar"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="bar", + attrs={"units": "bar units", "description": "a description"}, + ) + self.ds["baz"] = xr.DataArray( + randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), + coords={"lon": lons, "lat": lats}, + dims=("lon", "lat"), + name="baz", + attrs={"units": "baz units", "description": "a description"}, + ) + + self.ds.attrs = {"history": "created for xarray benchmarking"} + + self.oinds = { + "time": randint(0, self.nt, 120), + "lon": randint(0, self.nx, 20), + "lat": randint(0, self.ny, 10), + } + self.vinds = { + "time": xr.DataArray(randint(0, self.nt, 120), dims="x"), + "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"), + "lat": slice(3, 20), + } + root = {f"group_{group}": self.ds for group in range(self.nchildren)} + nested_tree1 = { + f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren) + } + nested_tree2 = { + f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset( + name="a" + ) + for group in range(self.nchildren) + } + nested_tree3 = { + f"group_{group}/subgroup_2/sub-subgroup_1": self.ds + for group in range(self.nchildren) + } + dtree = root | nested_tree1 | nested_tree2 | nested_tree3 + self.dtree = xr.DataTree.from_dict(dtree) + + +class IOReadDataTreeNetCDF4(IONestedDataTree): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_datatree() + self.format = "NETCDF4" + self.filepath = "datatree.nc4.nc" + dtree = self.dtree + dtree.to_netcdf(filepath=self.filepath) + + def time_load_datatree_netcdf4(self): + xr.open_datatree(self.filepath, engine="netcdf4").load() + + def time_open_datatree_netcdf4(self): + xr.open_datatree(self.filepath, engine="netcdf4") + + +class IOWriteNetCDFDask: + timeout = 60 + repeat = 1 + number = 5 + + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.write = create_delayed_write() + + def time_write(self): + self.write.compute() + + +class IOWriteNetCDFDaskDistributed: + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + try: + import distributed + except ImportError as err: + raise NotImplementedError() from err + + self.client = distributed.Client() + self.write = create_delayed_write() + + def cleanup(self): + self.client.shutdown() + + def time_write(self): + self.write.compute() + + +class IOReadSingleFile(IOSingleNetCDF): + def setup(self, *args, **kwargs): + self.make_ds() + + self.filepaths = {} + for engine in _ENGINES: + self.filepaths[engine] = f"test_single_file_with_{engine}.nc" + self.ds.to_netcdf(self.filepaths[engine], engine=engine) + + @parameterized(["engine", "chunks"], (_ENGINES, [None, {}])) + def time_read_dataset(self, engine, chunks): + xr.open_dataset(self.filepaths[engine], engine=engine, chunks=chunks) + + +class IOReadCustomEngine: + def setup(self, *args, **kwargs): + """ + The custom backend does the bare minimum to be considered a lazy backend. But + the data in it is still in memory so slow file reading shouldn't affect the + results. + """ + requires_dask() + + @dataclass + class PerformanceBackendArray(xr.backends.BackendArray): + filename_or_obj: str | os.PathLike | None + shape: tuple[int, ...] + dtype: np.dtype + lock: xr.backends.locks.SerializableLock + + def __getitem__(self, key: tuple): + return xr.core.indexing.explicit_indexing_adapter( + key, + self.shape, + xr.core.indexing.IndexingSupport.BASIC, + self._raw_indexing_method, + ) + + def _raw_indexing_method(self, key: tuple): + raise NotImplementedError + + @dataclass + class PerformanceStore(xr.backends.common.AbstractWritableDataStore): + manager: xr.backends.CachingFileManager + mode: str | None = None + lock: xr.backends.locks.SerializableLock | None = None + autoclose: bool = False + + def __post_init__(self): + self.filename = self.manager._args[0] + + @classmethod + def open( + cls, + filename: str | os.PathLike | None, + mode: str = "r", + lock: xr.backends.locks.SerializableLock | None = None, + autoclose: bool = False, + ): + locker = lock or xr.backends.locks.SerializableLock() + + manager = xr.backends.CachingFileManager( + xr.backends.DummyFileManager, + filename, + mode=mode, + ) + return cls(manager, mode=mode, lock=locker, autoclose=autoclose) + + def load(self) -> tuple: + """ + Load a bunch of test data quickly. + + Normally this method would've opened a file and parsed it. + """ + n_variables = 2000 + + # Important to have a shape and dtype for lazy loading. + shape = (1000,) + dtype = np.dtype(int) + variables = { + f"long_variable_name_{v}": xr.Variable( + data=PerformanceBackendArray( + self.filename, shape, dtype, self.lock + ), + dims=("time",), + fastpath=True, + ) + for v in range(n_variables) + } + attributes = {} + + return variables, attributes + + class PerformanceBackend(xr.backends.BackendEntrypoint): + def open_dataset( + self, + filename_or_obj: str | os.PathLike | None, + drop_variables: tuple[str, ...] | None = None, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + use_cftime=None, + decode_timedelta=None, + lock=None, + **kwargs, + ) -> xr.Dataset: + filename_or_obj = xr.backends.common._normalize_path(filename_or_obj) + store = PerformanceStore.open(filename_or_obj, lock=lock) + + store_entrypoint = xr.backends.store.StoreBackendEntrypoint() + + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds + + self.engine = PerformanceBackend + + @parameterized(["chunks"], ([None, {}, {"time": 10}])) + def time_open_dataset(self, chunks): + """ + Time how fast xr.open_dataset is without the slow data reading part. + Test with and without dask. + """ + xr.open_dataset(None, engine=self.engine, chunks=chunks) diff --git a/benchmarks/benchmarks/datatree.py b/benchmarks/benchmarks/datatree.py new file mode 100644 index 00000000000..9f1774f60ac --- /dev/null +++ b/benchmarks/benchmarks/datatree.py @@ -0,0 +1,15 @@ +import xarray as xr +from xarray.core.datatree import DataTree + + +class Datatree: + def setup(self): + run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})}) + self.d_few = {"run1": run1} + self.d_many = {f"run{i}": xr.Dataset({"a": 1}) for i in range(100)} + + def time_from_dict_few(self): + DataTree.from_dict(self.d_few) + + def time_from_dict_many(self): + DataTree.from_dict(self.d_many) diff --git a/benchmarks/benchmarks/groupby.py b/benchmarks/benchmarks/groupby.py new file mode 100644 index 00000000000..681fd6ed734 --- /dev/null +++ b/benchmarks/benchmarks/groupby.py @@ -0,0 +1,191 @@ +# import flox to avoid the cost of first import +import cftime +import flox.xarray # noqa: F401 +import numpy as np +import pandas as pd + +import xarray as xr + +from . import _skip_slow, parameterized, requires_dask + + +class GroupBy: + def setup(self, *args, **kwargs): + self.n = 100 + self.ds1d = xr.Dataset( + { + "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]), + "b": xr.DataArray(np.arange(2 * self.n)), + "c": xr.DataArray(np.arange(2 * self.n)), + } + ) + self.ds2d = self.ds1d.expand_dims(z=10).copy() + self.ds1d_mean = self.ds1d.groupby("b").mean() + self.ds2d_mean = self.ds2d.groupby("b").mean() + + @parameterized(["ndim"], [(1, 2)]) + def time_init(self, ndim): + getattr(self, f"ds{ndim}d").groupby("b") + + @parameterized( + ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] + ) + def time_agg_small_num_groups(self, method, ndim, use_flox): + ds = getattr(self, f"ds{ndim}d") + with xr.set_options(use_flox=use_flox): + getattr(ds.groupby("a"), method)().compute() + + @parameterized( + ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] + ) + def time_agg_large_num_groups(self, method, ndim, use_flox): + ds = getattr(self, f"ds{ndim}d") + with xr.set_options(use_flox=use_flox): + getattr(ds.groupby("b"), method)().compute() + + def time_binary_op_1d(self): + (self.ds1d.groupby("b") - self.ds1d_mean).compute() + + def time_binary_op_2d(self): + (self.ds2d.groupby("b") - self.ds2d_mean).compute() + + def peakmem_binary_op_1d(self): + (self.ds1d.groupby("b") - self.ds1d_mean).compute() + + def peakmem_binary_op_2d(self): + (self.ds2d.groupby("b") - self.ds2d_mean).compute() + + +class GroupByDask(GroupBy): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(**kwargs) + + self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)) + self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50}) + self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)) + self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5}) + self.ds1d_mean = self.ds1d.groupby("b").mean().compute() + self.ds2d_mean = self.ds2d.groupby("b").mean().compute() + + +# TODO: These don't work now because we are calling `.compute` explicitly. +class GroupByPandasDataFrame(GroupBy): + """Run groupby tests using pandas DataFrame.""" + + def setup(self, *args, **kwargs): + # Skip testing in CI as it won't ever change in a commit: + _skip_slow() + + super().setup(**kwargs) + self.ds1d = self.ds1d.to_dataframe() + self.ds1d_mean = self.ds1d.groupby("b").mean() + + def time_binary_op_2d(self): + raise NotImplementedError + + def peakmem_binary_op_2d(self): + raise NotImplementedError + + +class GroupByDaskDataFrame(GroupBy): + """Run groupby tests using dask DataFrame.""" + + def setup(self, *args, **kwargs): + # Skip testing in CI as it won't ever change in a commit: + _skip_slow() + + requires_dask() + super().setup(**kwargs) + self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dask_dataframe() + self.ds1d_mean = self.ds1d.groupby("b").mean().compute() + + def time_binary_op_2d(self): + raise NotImplementedError + + def peakmem_binary_op_2d(self): + raise NotImplementedError + + +class Resample: + def setup(self, *args, **kwargs): + self.ds1d = xr.Dataset( + { + "b": ("time", np.arange(365.0 * 24)), + }, + coords={"time": pd.date_range("2001-01-01", freq="h", periods=365 * 24)}, + ) + self.ds2d = self.ds1d.expand_dims(z=10) + self.ds1d_mean = self.ds1d.resample(time="48h").mean() + self.ds2d_mean = self.ds2d.resample(time="48h").mean() + + @parameterized(["ndim"], [(1, 2)]) + def time_init(self, ndim): + getattr(self, f"ds{ndim}d").resample(time="D") + + @parameterized( + ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] + ) + def time_agg_small_num_groups(self, method, ndim, use_flox): + ds = getattr(self, f"ds{ndim}d") + with xr.set_options(use_flox=use_flox): + getattr(ds.resample(time="3ME"), method)().compute() + + @parameterized( + ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] + ) + def time_agg_large_num_groups(self, method, ndim, use_flox): + ds = getattr(self, f"ds{ndim}d") + with xr.set_options(use_flox=use_flox): + getattr(ds.resample(time="48h"), method)().compute() + + +class ResampleDask(Resample): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(**kwargs) + self.ds1d = self.ds1d.chunk({"time": 50}) + self.ds2d = self.ds2d.chunk({"time": 50, "z": 4}) + + +class ResampleCFTime(Resample): + def setup(self, *args, **kwargs): + self.ds1d = xr.Dataset( + { + "b": ("time", np.arange(365.0 * 24)), + }, + coords={ + "time": xr.date_range( + "2001-01-01", freq="h", periods=365 * 24, calendar="noleap" + ) + }, + ) + self.ds2d = self.ds1d.expand_dims(z=10) + self.ds1d_mean = self.ds1d.resample(time="48h").mean() + self.ds2d_mean = self.ds2d.resample(time="48h").mean() + + +@parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]]) +class GroupByLongTime: + def setup(self, use_cftime, use_flox): + arr = np.random.randn(10, 10, 365 * 30) + time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime) + + # GH9426 - deep-copying CFTime object arrays is weirdly slow + asda = xr.DataArray(time) + labeled_time = [] + for year, month in zip(asda.dt.year, asda.dt.month, strict=True): + labeled_time.append(cftime.datetime(year, month, 1)) + + self.da = xr.DataArray( + arr, + dims=("y", "x", "time"), + coords={"time": time, "time2": ("time", labeled_time)}, + ) + + def time_setup(self, use_cftime, use_flox): + self.da.groupby("time.month") + + def time_mean(self, use_cftime, use_flox): + with xr.set_options(use_flox=use_flox): + self.da.groupby("time.year").mean() diff --git a/benchmarks/benchmarks/import.py b/benchmarks/benchmarks/import.py new file mode 100644 index 00000000000..f9d0bcc336b --- /dev/null +++ b/benchmarks/benchmarks/import.py @@ -0,0 +1,18 @@ +class Import: + """Benchmark importing xarray""" + + def timeraw_import_xarray(self): + return "import xarray" + + def timeraw_import_xarray_plot(self): + return "import xarray.plot" + + def timeraw_import_xarray_backends(self): + return """ + from xarray.backends import list_engines + list_engines() + """ + + def timeraw_import_xarray_only(self): + # import numpy and pandas in the setup stage + return "import xarray", "import numpy, pandas" diff --git a/benchmarks/benchmarks/indexing.py b/benchmarks/benchmarks/indexing.py new file mode 100644 index 00000000000..50bb8a5ee99 --- /dev/null +++ b/benchmarks/benchmarks/indexing.py @@ -0,0 +1,201 @@ +import os + +import numpy as np +import pandas as pd + +import xarray as xr + +from . import parameterized, randint, randn, requires_dask + +nx = 2000 +ny = 1000 +nt = 500 + +basic_indexes = { + "1scalar": {"x": 0}, + "1slice": {"x": slice(0, 3)}, + "1slice-1scalar": {"x": 0, "y": slice(None, None, 3)}, + "2slicess-1scalar": {"x": slice(3, -3, 3), "y": 1, "t": slice(None, -3, 3)}, +} + +basic_assignment_values = { + "1scalar": 0, + "1slice": xr.DataArray(randn((3, ny), frac_nan=0.1), dims=["x", "y"]), + "1slice-1scalar": xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=["y"]), + "2slicess-1scalar": xr.DataArray( + randn(np.empty(nx)[slice(3, -3, 3)].size, frac_nan=0.1), dims=["x"] + ), +} + +outer_indexes = { + "1d": {"x": randint(0, nx, 400)}, + "2d": {"x": randint(0, nx, 500), "y": randint(0, ny, 400)}, + "2d-1scalar": {"x": randint(0, nx, 100), "y": 1, "t": randint(0, nt, 400)}, +} + +outer_assignment_values = { + "1d": xr.DataArray(randn((400, ny), frac_nan=0.1), dims=["x", "y"]), + "2d": xr.DataArray(randn((500, 400), frac_nan=0.1), dims=["x", "y"]), + "2d-1scalar": xr.DataArray(randn(100, frac_nan=0.1), dims=["x"]), +} + + +def make_vectorized_indexes(n_index): + return { + "1-1d": {"x": xr.DataArray(randint(0, nx, n_index), dims="a")}, + "2-1d": { + "x": xr.DataArray(randint(0, nx, n_index), dims="a"), + "y": xr.DataArray(randint(0, ny, n_index), dims="a"), + }, + "3-2d": { + "x": xr.DataArray( + randint(0, nx, n_index).reshape(n_index // 100, 100), dims=["a", "b"] + ), + "y": xr.DataArray( + randint(0, ny, n_index).reshape(n_index // 100, 100), dims=["a", "b"] + ), + "t": xr.DataArray( + randint(0, nt, n_index).reshape(n_index // 100, 100), dims=["a", "b"] + ), + }, + } + + +vectorized_indexes = make_vectorized_indexes(400) +big_vectorized_indexes = make_vectorized_indexes(400_000) + +vectorized_assignment_values = { + "1-1d": xr.DataArray(randn((400, ny)), dims=["a", "y"], coords={"a": randn(400)}), + "2-1d": xr.DataArray(randn(400), dims=["a"], coords={"a": randn(400)}), + "3-2d": xr.DataArray( + randn((4, 100)), dims=["a", "b"], coords={"a": randn(4), "b": randn(100)} + ), +} + + +class Base: + def setup(self, key): + self.ds = xr.Dataset( + { + "var1": (("x", "y"), randn((nx, ny), frac_nan=0.1)), + "var2": (("x", "t"), randn((nx, nt))), + "var3": (("t",), randn(nt)), + }, + coords={ + "x": np.arange(nx), + "y": np.linspace(0, 1, ny), + "t": pd.date_range("1970-01-01", periods=nt, freq="D"), + "x_coords": ("x", np.linspace(1.1, 2.1, nx)), + }, + ) + # Benchmark how indexing is slowed down by adding many scalar variable + # to the dataset + # https://github.com/pydata/xarray/pull/9003 + self.ds_large = self.ds.merge({f"extra_var{i}": i for i in range(400)}) + + +class Indexing(Base): + @parameterized(["key"], [list(basic_indexes.keys())]) + def time_indexing_basic(self, key): + self.ds.isel(**basic_indexes[key]).load() + + @parameterized(["key"], [list(outer_indexes.keys())]) + def time_indexing_outer(self, key): + self.ds.isel(**outer_indexes[key]).load() + + @parameterized(["key"], [list(vectorized_indexes.keys())]) + def time_indexing_vectorized(self, key): + self.ds.isel(**vectorized_indexes[key]).load() + + @parameterized(["key"], [list(basic_indexes.keys())]) + def time_indexing_basic_ds_large(self, key): + # https://github.com/pydata/xarray/pull/9003 + self.ds_large.isel(**basic_indexes[key]).load() + + +class IndexingOnly(Base): + @parameterized(["key"], [list(basic_indexes.keys())]) + def time_indexing_basic(self, key): + self.ds.isel(**basic_indexes[key]) + + @parameterized(["key"], [list(outer_indexes.keys())]) + def time_indexing_outer(self, key): + self.ds.isel(**outer_indexes[key]) + + @parameterized(["key"], [list(big_vectorized_indexes.keys())]) + def time_indexing_big_vectorized(self, key): + self.ds.isel(**big_vectorized_indexes[key]) + + +class Assignment(Base): + @parameterized(["key"], [list(basic_indexes.keys())]) + def time_assignment_basic(self, key): + ind = basic_indexes[key] + val = basic_assignment_values[key] + self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val + + @parameterized(["key"], [list(outer_indexes.keys())]) + def time_assignment_outer(self, key): + ind = outer_indexes[key] + val = outer_assignment_values[key] + self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val + + @parameterized(["key"], [list(vectorized_indexes.keys())]) + def time_assignment_vectorized(self, key): + ind = vectorized_indexes[key] + val = vectorized_assignment_values[key] + self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val + + +class IndexingDask(Indexing): + def setup(self, key): + requires_dask() + super().setup(key) + self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) + + +class BooleanIndexing: + # https://github.com/pydata/xarray/issues/2227 + def setup(self): + self.ds = xr.Dataset( + {"a": ("time", np.arange(10_000_000))}, + coords={"time": np.arange(10_000_000)}, + ) + self.time_filter = self.ds.time > 50_000 + + def time_indexing(self): + self.ds.isel(time=self.time_filter) + + +class HugeAxisSmallSliceIndexing: + # https://github.com/pydata/xarray/pull/4560 + def setup(self): + self.filepath = "test_indexing_huge_axis_small_slice.nc" + if not os.path.isfile(self.filepath): + xr.Dataset( + {"a": ("x", np.arange(10_000_000))}, + coords={"x": np.arange(10_000_000)}, + ).to_netcdf(self.filepath, format="NETCDF4") + + self.ds = xr.open_dataset(self.filepath) + + def time_indexing(self): + self.ds.isel(x=slice(100)) + + def cleanup(self): + self.ds.close() + + +class AssignmentOptimized: + # https://github.com/pydata/xarray/pull/7382 + def setup(self): + self.ds = xr.Dataset(coords={"x": np.arange(500_000)}) + self.da = xr.DataArray(np.arange(500_000), dims="x") + + def time_assign_no_reindex(self): + # assign with non-indexed DataArray of same dimension size + self.ds.assign(foo=self.da) + + def time_assign_identical_indexes(self): + # fastpath index comparison (same index object) + self.ds.assign(foo=self.ds.x) diff --git a/benchmarks/benchmarks/interp.py b/benchmarks/benchmarks/interp.py new file mode 100644 index 00000000000..ca1d0a2dd89 --- /dev/null +++ b/benchmarks/benchmarks/interp.py @@ -0,0 +1,65 @@ +import numpy as np +import pandas as pd + +import xarray as xr + +from . import parameterized, randn, requires_dask + +nx = 1500 +ny = 1000 +nt = 500 + +randn_xy = randn((nx, ny), frac_nan=0.1) +randn_xt = randn((nx, nt)) +randn_t = randn((nt,)) + +new_x_short = np.linspace(0.3 * nx, 0.7 * nx, 100) +new_x_long = np.linspace(0.3 * nx, 0.7 * nx, 500) +new_y_long = np.linspace(0.1, 0.9, 500) + + +class Interpolation: + def setup(self, *args, **kwargs): + self.ds = xr.Dataset( + { + "var1": (("x", "y"), randn_xy), + "var2": (("x", "t"), randn_xt), + "var3": (("t",), randn_t), + "var4": (("z",), np.array(["text"])), + "var5": (("k",), np.array(["a", "b", "c"])), + }, + coords={ + "x": np.arange(nx), + "y": np.linspace(0, 1, ny), + "t": pd.date_range("1970-01-01", periods=nt, freq="D"), + "x_coords": ("x", np.linspace(1.1, 2.1, nx)), + "z": np.array([1]), + "k": np.linspace(0, nx, 3), + }, + ) + + @parameterized(["method", "is_short"], (["linear", "cubic"], [True, False])) + def time_interpolation_numeric_1d(self, method, is_short): + new_x = new_x_short if is_short else new_x_long + self.ds.interp(x=new_x, method=method).compute() + + @parameterized(["method"], (["linear", "nearest"])) + def time_interpolation_numeric_2d(self, method): + self.ds.interp(x=new_x_long, y=new_y_long, method=method).compute() + + @parameterized(["is_short"], ([True, False])) + def time_interpolation_string_scalar(self, is_short): + new_z = new_x_short if is_short else new_x_long + self.ds.interp(z=new_z).compute() + + @parameterized(["is_short"], ([True, False])) + def time_interpolation_string_1d(self, is_short): + new_k = new_x_short if is_short else new_x_long + self.ds.interp(k=new_k).compute() + + +class InterpolationDask(Interpolation): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(**kwargs) + self.ds = self.ds.chunk({"t": 50}) diff --git a/benchmarks/benchmarks/merge.py b/benchmarks/benchmarks/merge.py new file mode 100644 index 00000000000..6c8c1e9da90 --- /dev/null +++ b/benchmarks/benchmarks/merge.py @@ -0,0 +1,77 @@ +import numpy as np + +import xarray as xr + + +class DatasetAddVariable: + param_names = ["existing_elements"] + params = [[0, 10, 100, 1000]] + + def setup(self, existing_elements): + self.datasets = {} + # Dictionary insertion is fast(er) than xarray.Dataset insertion + d = {} + for i in range(existing_elements): + d[f"var{i}"] = i + self.dataset = xr.merge([d]) + + d = {f"set_2_{i}": i for i in range(existing_elements)} + self.dataset2 = xr.merge([d]) + + def time_variable_insertion(self, existing_elements): + dataset = self.dataset + dataset["new_var"] = 0 + + def time_merge_two_datasets(self, existing_elements): + xr.merge([self.dataset, self.dataset2]) + + +class DatasetCreation: + # The idea here is to time how long it takes to go from numpy + # and python data types, to a full dataset + # See discussion + # https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344 + param_names = ["strategy", "count"] + params = [ + ["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"], + [0, 1, 10, 100, 1000], + ] + + def setup(self, strategy, count): + data = np.array(["0", "b"], dtype=str) + self.dataset_coords = dict(time=np.array([0, 1])) + self.dataset_attrs = dict(description="Test data") + attrs = dict(units="Celsius") + if strategy == "dict_of_DataArrays": + + def create_data_vars(): + return { + f"long_variable_name_{i}": xr.DataArray( + data=data, dims=("time"), attrs=attrs + ) + for i in range(count) + } + + elif strategy == "dict_of_Variables": + + def create_data_vars(): + return { + f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs) + for i in range(count) + } + + elif strategy == "dict_of_Tuples": + + def create_data_vars(): + return { + f"long_variable_name_{i}": ("time", data, attrs) + for i in range(count) + } + + self.create_data_vars = create_data_vars + + def time_dataset_creation(self, strategy, count): + data_vars = self.create_data_vars() + xr.Dataset( + data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs + ) diff --git a/benchmarks/benchmarks/pandas.py b/benchmarks/benchmarks/pandas.py new file mode 100644 index 00000000000..ebe61081916 --- /dev/null +++ b/benchmarks/benchmarks/pandas.py @@ -0,0 +1,64 @@ +import numpy as np +import pandas as pd + +import xarray as xr + +from . import parameterized, requires_dask + + +class MultiIndexSeries: + def setup(self, dtype, subset): + data = np.random.rand(100000).astype(dtype) + index = pd.MultiIndex.from_product( + [ + list("abcdefhijk"), + list("abcdefhijk"), + pd.date_range(start="2000-01-01", periods=1000, freq="D"), + ] + ) + series = pd.Series(data, index) + if subset: + series = series[::3] + self.series = series + + @parameterized(["dtype", "subset"], ([int, float], [True, False])) + def time_from_series(self, dtype, subset): + xr.DataArray.from_series(self.series) + + +class ToDataFrame: + def setup(self, *args, **kwargs): + xp = kwargs.get("xp", np) + nvars = kwargs.get("nvars", 1) + random_kws = kwargs.get("random_kws", {}) + method = kwargs.get("method", "to_dataframe") + + dim1 = 10_000 + dim2 = 10_000 + + var = xr.Variable( + dims=("dim1", "dim2"), data=xp.random.random((dim1, dim2), **random_kws) + ) + data_vars = {f"long_name_{v}": (("dim1", "dim2"), var) for v in range(nvars)} + + ds = xr.Dataset( + data_vars, coords={"dim1": np.arange(0, dim1), "dim2": np.arange(0, dim2)} + ) + self.to_frame = getattr(ds, method) + + def time_to_dataframe(self): + self.to_frame() + + def peakmem_to_dataframe(self): + self.to_frame() + + +class ToDataFrameDask(ToDataFrame): + def setup(self, *args, **kwargs): + requires_dask() + + import dask.array as da + + super().setup( + xp=da, random_kws=dict(chunks=5000), method="to_dask_dataframe", nvars=500 + ) diff --git a/benchmarks/benchmarks/polyfit.py b/benchmarks/benchmarks/polyfit.py new file mode 100644 index 00000000000..429ffa19baa --- /dev/null +++ b/benchmarks/benchmarks/polyfit.py @@ -0,0 +1,38 @@ +import numpy as np + +import xarray as xr + +from . import parameterized, randn, requires_dask + +NDEGS = (2, 5, 20) +NX = (10**2, 10**6) + + +class Polyval: + def setup(self, *args, **kwargs): + self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX} + self.coeffs = { + ndeg: xr.DataArray( + randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)} + ) + for ndeg in NDEGS + } + + @parameterized(["nx", "ndeg"], [NX, NDEGS]) + def time_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c).compute() + + @parameterized(["nx", "ndeg"], [NX, NDEGS]) + def peakmem_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c).compute() + + +class PolyvalDask(Polyval): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(*args, **kwargs) + self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()} diff --git a/benchmarks/benchmarks/reindexing.py b/benchmarks/benchmarks/reindexing.py new file mode 100644 index 00000000000..61e6b2213f3 --- /dev/null +++ b/benchmarks/benchmarks/reindexing.py @@ -0,0 +1,52 @@ +import numpy as np + +import xarray as xr + +from . import requires_dask + +ntime = 500 +nx = 50 +ny = 50 + + +class Reindex: + def setup(self): + data = np.random.default_rng(0).random((ntime, nx, ny)) + self.ds = xr.Dataset( + {"temperature": (("time", "x", "y"), data)}, + coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)}, + ) + + def time_1d_coarse(self): + self.ds.reindex(time=np.arange(0, ntime, 5)).load() + + def time_1d_fine_all_found(self): + self.ds.reindex(time=np.arange(0, ntime, 0.5), method="nearest").load() + + def time_1d_fine_some_missing(self): + self.ds.reindex( + time=np.arange(0, ntime, 0.5), method="nearest", tolerance=0.1 + ).load() + + def time_2d_coarse(self): + self.ds.reindex(x=np.arange(0, nx, 2), y=np.arange(0, ny, 2)).load() + + def time_2d_fine_all_found(self): + self.ds.reindex( + x=np.arange(0, nx, 0.5), y=np.arange(0, ny, 0.5), method="nearest" + ).load() + + def time_2d_fine_some_missing(self): + self.ds.reindex( + x=np.arange(0, nx, 0.5), + y=np.arange(0, ny, 0.5), + method="nearest", + tolerance=0.1, + ).load() + + +class ReindexDask(Reindex): + def setup(self): + requires_dask() + super().setup() + self.ds = self.ds.chunk({"time": 100}) diff --git a/benchmarks/benchmarks/renaming.py b/benchmarks/benchmarks/renaming.py new file mode 100644 index 00000000000..3ade5d8df70 --- /dev/null +++ b/benchmarks/benchmarks/renaming.py @@ -0,0 +1,27 @@ +import numpy as np + +import xarray as xr + + +class SwapDims: + param_names = ["size"] + params = [[int(1e3), int(1e5), int(1e7)]] + + def setup(self, size: int) -> None: + self.ds = xr.Dataset( + {"a": (("x", "t"), np.ones((size, 2)))}, + coords={ + "x": np.arange(size), + "y": np.arange(size), + "z": np.arange(size), + "x2": ("x", np.arange(size)), + "y2": ("y", np.arange(size)), + "z2": ("z", np.arange(size)), + }, + ) + + def time_swap_dims(self, size: int) -> None: + self.ds.swap_dims({"x": "xn", "y": "yn", "z": "zn"}) + + def time_swap_dims_newindex(self, size: int) -> None: + self.ds.swap_dims({"x": "x2", "y": "y2", "z": "z2"}) diff --git a/benchmarks/benchmarks/repr.py b/benchmarks/benchmarks/repr.py new file mode 100644 index 00000000000..68a082fcc4f --- /dev/null +++ b/benchmarks/benchmarks/repr.py @@ -0,0 +1,87 @@ +import numpy as np +import pandas as pd + +import xarray as xr + + +class Repr: + def setup(self): + a = np.arange(0, 100) + data_vars = dict() + for i in a: + data_vars[f"long_variable_name_{i}"] = xr.DataArray( + name=f"long_variable_name_{i}", + data=np.arange(0, 20), + dims=[f"long_coord_name_{i}_x"], + coords={f"long_coord_name_{i}_x": np.arange(0, 20) * 2}, + ) + self.ds = xr.Dataset(data_vars) + self.ds.attrs = {f"attr_{k}": 2 for k in a} + + def time_repr(self): + repr(self.ds) + + def time_repr_html(self): + self.ds._repr_html_() + + +class ReprDataTree: + def setup(self): + # construct a datatree with 500 nodes + number_of_files = 20 + number_of_groups = 25 + tree_dict = {} + for f in range(number_of_files): + for g in range(number_of_groups): + tree_dict[f"file_{f}/group_{g}"] = xr.Dataset({"g": f * g}) + + self.dt = xr.DataTree.from_dict(tree_dict) + + def time_repr(self): + repr(self.dt) + + def time_repr_html(self): + self.dt._repr_html_() + + +class ReprMultiIndex: + def setup(self): + index = pd.MultiIndex.from_product( + [range(1000), range(1000)], names=("level_0", "level_1") + ) + series = pd.Series(range(1000 * 1000), index=index) + self.da = xr.DataArray(series) + + def time_repr(self): + repr(self.da) + + def time_repr_html(self): + self.da._repr_html_() + + +class ReprPandasRangeIndex: + # display a memory-saving pandas.RangeIndex shouldn't trigger memory + # expensive conversion into a numpy array + def setup(self): + index = xr.indexes.PandasIndex(pd.RangeIndex(1_000_000), "x") + self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) + + def time_repr(self): + repr(self.ds.x) + + def time_repr_html(self): + self.ds.x._repr_html_() + + +class ReprXarrayRangeIndex: + # display an Xarray RangeIndex shouldn't trigger memory expensive conversion + # of its lazy coordinate into a numpy array + def setup(self): + index = xr.indexes.RangeIndex.arange(1_000_000, dim="x") + self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) + + def time_repr(self): + repr(self.ds.x) + + def time_repr_html(self): + self.ds.x._repr_html_() diff --git a/benchmarks/benchmarks/rolling.py b/benchmarks/benchmarks/rolling.py new file mode 100644 index 00000000000..4fa2e09c9c0 --- /dev/null +++ b/benchmarks/benchmarks/rolling.py @@ -0,0 +1,142 @@ +import numpy as np +import pandas as pd + +import xarray as xr + +from . import _skip_slow, parameterized, randn, requires_dask + +nx = 3000 +long_nx = 30000 +ny = 200 +nt = 1000 +window = 20 + +randn_xy = randn((nx, ny), frac_nan=0.1) +randn_xt = randn((nx, nt)) +randn_t = randn((nt,)) +randn_long = randn((long_nx,), frac_nan=0.1) + + +class Rolling: + def setup(self, *args, **kwargs): + self.ds = xr.Dataset( + { + "var1": (("x", "y"), randn_xy), + "var2": (("x", "t"), randn_xt), + "var3": (("t",), randn_t), + }, + coords={ + "x": np.arange(nx), + "y": np.linspace(0, 1, ny), + "t": pd.date_range("1970-01-01", periods=nt, freq="D"), + "x_coords": ("x", np.linspace(1.1, 2.1, nx)), + }, + ) + self.da_long = xr.DataArray( + randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1} + ) + + @parameterized( + ["func", "center", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling(self, func, center, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + getattr(self.ds.rolling(x=window, center=center), func)().load() + + @parameterized( + ["func", "pandas", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling_long(self, func, pandas, use_bottleneck): + if pandas: + se = self.da_long.to_series() + getattr(se.rolling(window=window, min_periods=window), func)() + else: + with xr.set_options(use_bottleneck=use_bottleneck): + getattr( + self.da_long.rolling(x=window, min_periods=window), func + )().load() + + @parameterized( + ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False]) + ) + def time_rolling_np(self, window_, min_periods, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( + np.nansum + ).load() + + @parameterized( + ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False]) + ) + def time_rolling_construct(self, center, stride, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window, center=center).construct( + "window_dim", stride=stride + ).sum(dim="window_dim").load() + + +class RollingDask(Rolling): + def setup(self, *args, **kwargs): + requires_dask() + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + super().setup(**kwargs) + self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) + self.da_long = self.da_long.chunk({"x": 10000}) + + +class RollingMemory: + def setup(self, *args, **kwargs): + self.ds = xr.Dataset( + { + "var1": (("x", "y"), randn_xy), + "var2": (("x", "t"), randn_xt), + "var3": (("t",), randn_t), + }, + coords={ + "x": np.arange(nx), + "y": np.linspace(0, 1, ny), + "t": pd.date_range("1970-01-01", periods=nt, freq="D"), + "x_coords": ("x", np.linspace(1.1, 2.1, nx)), + }, + ) + + +class DataArrayRollingMemory(RollingMemory): + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var1.rolling(x=10, y=4) + getattr(roll, func)() + + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var3.rolling(t=100) + getattr(roll, func)() + + @parameterized(["stride"], ([None, 5, 50])) + def peakmem_1drolling_construct(self, stride): + self.ds.var2.rolling(t=100).construct("w", stride=stride) + self.ds.var3.rolling(t=100).construct("w", stride=stride) + + +class DatasetRollingMemory(RollingMemory): + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(x=10, y=4) + getattr(roll, func)() + + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(t=100) + getattr(roll, func)() + + @parameterized(["stride"], ([None, 5, 50])) + def peakmem_1drolling_construct(self, stride): + self.ds.rolling(t=100).construct("w", stride=stride) diff --git a/benchmarks/benchmarks/unstacking.py b/benchmarks/benchmarks/unstacking.py new file mode 100644 index 00000000000..b3af5eac19c --- /dev/null +++ b/benchmarks/benchmarks/unstacking.py @@ -0,0 +1,64 @@ +import numpy as np +import pandas as pd + +import xarray as xr + +from . import requires_dask, requires_sparse + + +class Unstacking: + def setup(self): + data = np.random.default_rng(0).random((250, 500)) + self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...]) + self.da_missing = self.da_full[:-1] + self.df_missing = self.da_missing.to_pandas() + + def time_unstack_fast(self): + self.da_full.unstack("flat_dim") + + def time_unstack_slow(self): + self.da_missing.unstack("flat_dim") + + def time_unstack_pandas_slow(self): + self.df_missing.unstack() + + +class UnstackingDask(Unstacking): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(**kwargs) + self.da_full = self.da_full.chunk({"flat_dim": 25}) + + +class UnstackingSparse(Unstacking): + def setup(self, *args, **kwargs): + requires_sparse() + + import sparse + + data = sparse.random((500, 1000), random_state=0, fill_value=0) + self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...]) + self.da_missing = self.da_full[:-1] + + mindex = pd.MultiIndex.from_arrays([np.arange(100), np.arange(100)]) + self.da_eye_2d = xr.DataArray(np.ones((100,)), dims="z", coords={"z": mindex}) + self.da_eye_3d = xr.DataArray( + np.ones((100, 50)), + dims=("z", "foo"), + coords={"z": mindex, "foo": np.arange(50)}, + ) + + def time_unstack_to_sparse_2d(self): + self.da_eye_2d.unstack(sparse=True) + + def time_unstack_to_sparse_3d(self): + self.da_eye_3d.unstack(sparse=True) + + def peakmem_unstack_to_sparse_2d(self): + self.da_eye_2d.unstack(sparse=True) + + def peakmem_unstack_to_sparse_3d(self): + self.da_eye_3d.unstack(sparse=True) + + def time_unstack_pandas_slow(self): + pass diff --git a/benchmarks/benchmarks_iris/__init__.py b/benchmarks/benchmarks_iris/__init__.py new file mode 100644 index 00000000000..e41fe6388d9 --- /dev/null +++ b/benchmarks/benchmarks_iris/__init__.py @@ -0,0 +1,76 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Common code for benchmarks.""" + +from os import environ + +import iris + +from . import generate_data +from .generate_data.um_files import create_um_files + + +def disable_repeat_between_setup(benchmark_object): + """Benchmark where object persistence would be inappropriate (decorator). + + E.g: + + * Benchmarking data realisation + * Benchmarking Cube coord addition + + Can be applied to benchmark classes/methods/functions. + + https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks + + """ + # Prevent repeat runs between setup() runs - object(s) will persist after 1st. + benchmark_object.number = 1 + # Compensate for reduced certainty by increasing number of repeats. + # (setup() is run between each repeat). + # Minimum 5 repeats, run up to 30 repeats / 20 secs whichever comes first. + benchmark_object.repeat = (5, 30, 20.0) + # ASV uses warmup to estimate benchmark time before planning the real run. + # Prevent this, since object(s) will persist after first warmup run, + # which would give ASV misleading info (warmups ignore ``number``). + benchmark_object.warmup_time = 0.0 + + return benchmark_object + + +def on_demand_benchmark(benchmark_object): + """Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set. + + This is a decorator. + + For benchmarks that, for whatever reason, should not be run by default. + E.g: + + * Require a local file + * Used for scalability analysis instead of commit monitoring. + + Can be applied to benchmark classes/methods/functions. + + """ + if "ON_DEMAND_BENCHMARKS" in environ: + return benchmark_object + + +@on_demand_benchmark +class ValidateSetup: + """Simple benchmarks that exercise all elements of our setup.""" + + params = [1, 2] + + def setup(self, param): + generate_data.REUSE_DATA = False + (self.file_path,) = create_um_files( + param, param, param, param, False, ["NetCDF"] + ).values() + + def time_validate(self, param): + _ = iris.load(self.file_path) + + def tracemalloc_validate(self, param): + _ = iris.load(self.file_path) diff --git a/benchmarks/benchmarks_iris/aggregate_collapse.py b/benchmarks/benchmarks_iris/aggregate_collapse.py new file mode 100644 index 00000000000..4d5d2923bc1 --- /dev/null +++ b/benchmarks/benchmarks_iris/aggregate_collapse.py @@ -0,0 +1,212 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``.""" + +import warnings + +import numpy as np + +from iris import analysis, coords, cube +from iris.warnings import IrisVagueMetadataWarning + +from .generate_data.stock import realistic_4d_w_everything + + +class AggregationMixin: + params = [[False, True]] + param_names = ["Lazy operations"] + + def setup(self, lazy_run: bool): + warnings.filterwarnings("ignore", message="Ignoring a datum") + warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning) + cube = realistic_4d_w_everything(lazy=lazy_run) + + for cm in cube.cell_measures(): + cube.remove_cell_measure(cm) + for av in cube.ancillary_variables(): + cube.remove_ancillary_variable(av) + + agg_mln_data = np.arange(0, 70, 10) + agg_mln_repeat = np.repeat(agg_mln_data, 10) + + cube = cube[..., :10, :10] + + self.mln_aux = "aggregatable" + self.mln = "model_level_number" + agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name=self.mln_aux) + + if lazy_run: + agg_mln_coord.points = agg_mln_coord.lazy_points() + cube.add_aux_coord(agg_mln_coord, 1) + self.cube = cube + + +class Aggregation(AggregationMixin): + def time_aggregated_by_MEAN(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.MEAN).data + + def time_aggregated_by_COUNT(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.COUNT, function=lambda values: values > 280 + ).data + + def time_aggregated_by_GMEAN(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.GMEAN).data + + def time_aggregated_by_HMEAN(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.HMEAN).data + + def time_aggregated_by_MAX_RUN(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.MAX_RUN, function=lambda values: values > 280 + ).data + + def time_aggregated_by_MAX(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.MAX).data + + def time_aggregated_by_MEDIAN(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.MEDIAN).data + + def time_aggregated_by_MIN(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.MIN).data + + def time_aggregated_by_PEAK(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.PEAK).data + + def time_aggregated_by_PERCENTILE(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.PERCENTILE, percent=[10, 50, 90] + ).data + + def time_aggregated_by_FAST_PERCENTILE(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, + analysis.PERCENTILE, + mdtol=0, + percent=[10, 50, 90], + fast_percentile_method=True, + ).data + + def time_aggregated_by_PROPORTION(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, + analysis.PROPORTION, + function=lambda values: values > 280, + ).data + + def time_aggregated_by_STD_DEV(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.STD_DEV).data + + def time_aggregated_by_VARIANCE(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.VARIANCE).data + + def time_aggregated_by_RMS(self, _): + _ = self.cube.aggregated_by(self.mln_aux, analysis.RMS).data + + def time_collapsed_by_MEAN(self, _): + _ = self.cube.collapsed(self.mln, analysis.MEAN).data + + def time_collapsed_by_COUNT(self, _): + _ = self.cube.collapsed( + self.mln, analysis.COUNT, function=lambda values: values > 280 + ).data + + def time_collapsed_by_GMEAN(self, _): + _ = self.cube.collapsed(self.mln, analysis.GMEAN).data + + def time_collapsed_by_HMEAN(self, _): + _ = self.cube.collapsed(self.mln, analysis.HMEAN).data + + def time_collapsed_by_MAX_RUN(self, _): + _ = self.cube.collapsed( + self.mln, analysis.MAX_RUN, function=lambda values: values > 280 + ).data + + def time_collapsed_by_MAX(self, _): + _ = self.cube.collapsed(self.mln, analysis.MAX).data + + def time_collapsed_by_MEDIAN(self, _): + _ = self.cube.collapsed(self.mln, analysis.MEDIAN).data + + def time_collapsed_by_MIN(self, _): + _ = self.cube.collapsed(self.mln, analysis.MIN).data + + def time_collapsed_by_PEAK(self, _): + _ = self.cube.collapsed(self.mln, analysis.PEAK).data + + def time_collapsed_by_PERCENTILE(self, _): + _ = self.cube.collapsed( + self.mln, analysis.PERCENTILE, percent=[10, 50, 90] + ).data + + def time_collapsed_by_FAST_PERCENTILE(self, _): + _ = self.cube.collapsed( + self.mln, + analysis.PERCENTILE, + mdtol=0, + percent=[10, 50, 90], + fast_percentile_method=True, + ).data + + def time_collapsed_by_PROPORTION(self, _): + _ = self.cube.collapsed( + self.mln, analysis.PROPORTION, function=lambda values: values > 280 + ).data + + def time_collapsed_by_STD_DEV(self, _): + _ = self.cube.collapsed(self.mln, analysis.STD_DEV).data + + def time_collapsed_by_VARIANCE(self, _): + _ = self.cube.collapsed(self.mln, analysis.VARIANCE).data + + def time_collapsed_by_RMS(self, _): + _ = self.cube.collapsed(self.mln, analysis.RMS).data + + +class WeightedAggregation(AggregationMixin): + def setup(self, lazy_run): + super().setup(lazy_run) + + weights = np.linspace(0, 1, 70) + weights = np.broadcast_to(weights, self.cube.shape[:2]) + weights = np.broadcast_to(weights.T, self.cube.shape[::-1]) + weights = weights.T + + self.weights = weights + + ## currently has problems with indexing weights + # def time_w_aggregated_by_WPERCENTILE(self, _): + # _ = self.cube.aggregated_by( + # self.mln_aux, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90] + # ).data + + def time_w_aggregated_by_SUM(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.SUM, weights=self.weights + ).data + + def time_w_aggregated_by_RMS(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.RMS, weights=self.weights + ).data + + def time_w_aggregated_by_MEAN(self, _): + _ = self.cube.aggregated_by( + self.mln_aux, analysis.MEAN, weights=self.weights + ).data + + def time_w_collapsed_by_WPERCENTILE(self, _): + _ = self.cube.collapsed( + self.mln, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90] + ).data + + def time_w_collapsed_by_SUM(self, _): + _ = self.cube.collapsed(self.mln, analysis.SUM, weights=self.weights).data + + def time_w_collapsed_by_RMS(self, _): + _ = self.cube.collapsed(self.mln, analysis.RMS, weights=self.weights).data + + def time_w_collapsed_by_MEAN(self, _): + _ = self.cube.collapsed(self.mln, analysis.MEAN, weights=self.weights).data diff --git a/benchmarks/benchmarks_iris/cperf/__init__.py b/benchmarks/benchmarks_iris/cperf/__init__.py new file mode 100644 index 00000000000..05a086bc44b --- /dev/null +++ b/benchmarks/benchmarks_iris/cperf/__init__.py @@ -0,0 +1,92 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project. + +CPerf = comparing performance working with data in UM versus LFRic formats. + +Files available from the UK Met Office: + moo ls moose:/adhoc/projects/avd/asv/data_for_nightly_tests/ +""" + +import numpy as np + +from iris import load_cube + +from ..generate_data import BENCHMARK_DATA +from ..generate_data.ugrid import make_cubesphere_testfile + +# The data of the core test UM files has dtype=np.float32 shape=(1920, 2560) +_UM_DIMS_YX = (1920, 2560) +# The closest cubesphere size in terms of datapoints is sqrt(1920*2560 / 6) +# This gives ~= 905, i.e. "C905" +_N_CUBESPHERE_UM_EQUIVALENT = int(np.sqrt(np.prod(_UM_DIMS_YX) / 6)) + + +class SingleDiagnosticMixin: + """For use in any benchmark classes that work on a single diagnostic file.""" + + params = [ + ["LFRic", "UM", "UM_lbpack0", "UM_netcdf"], + [False, True], + [False, True], + ] + param_names = ["file type", "height dim (len 71)", "time dim (len 3)"] + + def setup(self, file_type, three_d, three_times): + if file_type == "LFRic": + # Generate an appropriate synthetic LFRic file. + if three_times: + n_times = 3 + else: + n_times = 1 + + # Use a cubesphere size ~equivalent to our UM test data. + cells_per_panel_edge = _N_CUBESPHERE_UM_EQUIVALENT + create_kwargs = dict(c_size=cells_per_panel_edge, n_times=n_times) + + if three_d: + create_kwargs["n_levels"] = 71 + + # Will reuse a file if already present. + file_path = make_cubesphere_testfile(**create_kwargs) + + else: + # Locate the appropriate UM file. + if three_times: + # pa/pb003 files + numeric = "003" + else: + # pa/pb000 files + numeric = "000" + + if three_d: + # theta diagnostic, N1280 file w/ 71 levels (1920, 2560, 71) + file_name = f"umglaa_pb{numeric}-theta" + else: + # surface_temp diagnostic, N1280 file (1920, 2560) + file_name = f"umglaa_pa{numeric}-surfacetemp" + + file_suffices = { + "UM": "", # packed FF (WGDOS lbpack = 1) + "UM_lbpack0": ".uncompressed", # unpacked FF (lbpack = 0) + "UM_netcdf": ".nc", # UM file -> Iris -> NetCDF file + } + suffix = file_suffices[file_type] + + file_path = (BENCHMARK_DATA / file_name).with_suffix(suffix) + if not file_path.exists(): + message = "\n".join( + [ + f"Expected local file not found: {file_path}", + "Available from the UK Met Office.", + ] + ) + raise FileNotFoundError(message) + + self.file_path = file_path + self.file_type = file_type + + def load(self): + return load_cube(str(self.file_path)) diff --git a/benchmarks/benchmarks_iris/cperf/equality.py b/benchmarks/benchmarks_iris/cperf/equality.py new file mode 100644 index 00000000000..ffe61ef9387 --- /dev/null +++ b/benchmarks/benchmarks_iris/cperf/equality.py @@ -0,0 +1,55 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Equality benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" + +from .. import on_demand_benchmark +from . import SingleDiagnosticMixin + + +class EqualityMixin(SingleDiagnosticMixin): + r"""Use :class:`SingleDiagnosticMixin` as the realistic case. + + Uses :class:`SingleDiagnosticMixin` as the realistic case will be comparing + :class:`~iris.cube.Cube`\\ s that have been loaded from file. + + """ + + # Cut down the parent parameters. + params = [["LFRic", "UM"]] + + def setup(self, file_type, three_d=False, three_times=False): + super().setup(file_type, three_d, three_times) + self.cube = self.load() + self.other_cube = self.load() + + +@on_demand_benchmark +class CubeEquality(EqualityMixin): + r"""Benchmark time & memory costs of comparing LFRic & UM :class:`~iris.cube.Cube`\\ s.""" + + def _comparison(self): + _ = self.cube == self.other_cube + + def peakmem_eq(self, file_type): + self._comparison() + + def time_eq(self, file_type): + self._comparison() + + +@on_demand_benchmark +class MeshEquality(EqualityMixin): + """Provides extra context for :class:`CubeEquality`.""" + + params = [["LFRic"]] + + def _comparison(self): + _ = self.cube.mesh == self.other_cube.mesh + + def peakmem_eq(self, file_type): + self._comparison() + + def time_eq(self, file_type): + self._comparison() diff --git a/benchmarks/benchmarks_iris/cperf/load.py b/benchmarks/benchmarks_iris/cperf/load.py new file mode 100644 index 00000000000..07c2de9e79f --- /dev/null +++ b/benchmarks/benchmarks_iris/cperf/load.py @@ -0,0 +1,55 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File loading benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" + +from .. import on_demand_benchmark +from . import SingleDiagnosticMixin + + +@on_demand_benchmark +class SingleDiagnosticLoad(SingleDiagnosticMixin): + def time_load(self, _, __, ___): + """Perform a 'real world comparison'. + + * UM coords are always realised (DimCoords). + * LFRic coords are not realised by default (MeshCoords). + + """ + cube = self.load() + assert cube.has_lazy_data() + # UM files load lon/lat as DimCoords, which are always realised. + expecting_lazy_coords = self.file_type == "LFRic" + for coord_name in "longitude", "latitude": + coord = cube.coord(coord_name) + assert coord.has_lazy_points() == expecting_lazy_coords + assert coord.has_lazy_bounds() == expecting_lazy_coords + + def time_load_w_realised_coords(self, _, __, ___): + """Valuable extra comparison where both UM and LFRic coords are realised.""" + cube = self.load() + for coord_name in "longitude", "latitude": + coord = cube.coord(coord_name) + # Don't touch actual points/bounds objects - permanent + # realisation plays badly with ASV's re-run strategy. + if coord.has_lazy_points(): + coord.core_points().compute() + if coord.has_lazy_bounds(): + coord.core_bounds().compute() + + +@on_demand_benchmark +class SingleDiagnosticRealise(SingleDiagnosticMixin): + # The larger files take a long time to realise. + timeout = 600.0 + + def setup(self, file_type, three_d, three_times): + super().setup(file_type, three_d, three_times) + self.loaded_cube = self.load() + + def time_realise(self, _, __, ___): + # Don't touch loaded_cube.data - permanent realisation plays badly with + # ASV's re-run strategy. + assert self.loaded_cube.has_lazy_data() + self.loaded_cube.core_data().compute() diff --git a/benchmarks/benchmarks_iris/cperf/save.py b/benchmarks/benchmarks_iris/cperf/save.py new file mode 100644 index 00000000000..6dcd0b3bcf9 --- /dev/null +++ b/benchmarks/benchmarks_iris/cperf/save.py @@ -0,0 +1,40 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File saving benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" + +from iris import save + +from .. import on_demand_benchmark +from ..generate_data.ugrid import make_cube_like_2d_cubesphere, make_cube_like_umfield +from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX + + +@on_demand_benchmark +class NetcdfSave: + """Benchmark time and memory costs of saving ~large-ish data cubes to netcdf. + + Parametrised by file type. + + """ + + params = ["LFRic", "UM"] + param_names = ["data type"] + + def setup(self, data_type): + if data_type == "LFRic": + self.cube = make_cube_like_2d_cubesphere( + n_cube=_N_CUBESPHERE_UM_EQUIVALENT, with_mesh=True + ) + else: + self.cube = make_cube_like_umfield(_UM_DIMS_YX) + + def _save_data(self, cube): + save(cube, "tmp.nc") + + def time_save_data_netcdf(self, data_type): + self._save_data(self.cube) + + def tracemalloc_save_data_netcdf(self, data_type): + self._save_data(self.cube) diff --git a/benchmarks/benchmarks_iris/cube.py b/benchmarks/benchmarks_iris/cube.py new file mode 100644 index 00000000000..0b6829ee2d3 --- /dev/null +++ b/benchmarks/benchmarks_iris/cube.py @@ -0,0 +1,116 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Cube benchmark tests.""" + +from collections.abc import Iterable + +from iris import coords +from iris.cube import Cube + +from .generate_data.stock import realistic_4d_w_everything + + +class CubeCreation: + params = [[False, True], ["instantiate", "construct"]] + param_names = ["Cube has mesh", "Cube creation strategy"] + + cube_kwargs: dict + + def setup(self, w_mesh: bool, _) -> None: + # Loaded as two cubes due to the hybrid height. + source_cube = realistic_4d_w_everything(w_mesh=w_mesh) + + def get_coords_and_dims( + coords_iter: Iterable[coords._DimensionalMetadata], + ) -> list[tuple[coords._DimensionalMetadata, tuple[int, ...]]]: + return [(c, c.cube_dims(source_cube)) for c in coords_iter] + + self.cube_kwargs = dict( + data=source_cube.data, + standard_name=source_cube.standard_name, + long_name=source_cube.long_name, + var_name=source_cube.var_name, + units=source_cube.units, + attributes=source_cube.attributes, + cell_methods=source_cube.cell_methods, + dim_coords_and_dims=get_coords_and_dims(source_cube.dim_coords), + aux_coords_and_dims=get_coords_and_dims(source_cube.aux_coords), + aux_factories=source_cube.aux_factories, + cell_measures_and_dims=get_coords_and_dims(source_cube.cell_measures()), + ancillary_variables_and_dims=get_coords_and_dims( + source_cube.ancillary_variables() + ), + ) + + def time_create(self, _, cube_creation_strategy: str) -> None: + if cube_creation_strategy == "instantiate": + _ = Cube(**self.cube_kwargs) + + elif cube_creation_strategy == "construct": + new_cube = Cube(data=self.cube_kwargs["data"]) + new_cube.standard_name = self.cube_kwargs["standard_name"] + new_cube.long_name = self.cube_kwargs["long_name"] + new_cube.var_name = self.cube_kwargs["var_name"] + new_cube.units = self.cube_kwargs["units"] + new_cube.attributes = self.cube_kwargs["attributes"] + new_cube.cell_methods = self.cube_kwargs["cell_methods"] + for coord, dims in self.cube_kwargs["dim_coords_and_dims"]: + assert isinstance(coord, coords.DimCoord) # Type hint to help linters. + new_cube.add_dim_coord(coord, dims) + for coord, dims in self.cube_kwargs["aux_coords_and_dims"]: + new_cube.add_aux_coord(coord, dims) + for aux_factory in self.cube_kwargs["aux_factories"]: + new_cube.add_aux_factory(aux_factory) + for cell_measure, dims in self.cube_kwargs["cell_measures_and_dims"]: + new_cube.add_cell_measure(cell_measure, dims) + for ancillary_variable, dims in self.cube_kwargs[ + "ancillary_variables_and_dims" + ]: + new_cube.add_ancillary_variable(ancillary_variable, dims) + + else: + message = f"Unknown cube creation strategy: {cube_creation_strategy}" + raise NotImplementedError(message) + + +class CubeEquality: + params = [ + [False, True], + [False, True], + ["metadata_inequality", "coord_inequality", "data_inequality", "all_equal"], + ] + param_names = ["Cubes are lazy", "Cubes have meshes", "Scenario"] + + cube_1: Cube + cube_2: Cube + coord_name = "surface_altitude" + + def setup(self, lazy: bool, w_mesh: bool, scenario: str) -> None: + self.cube_1 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy) + # Using Cube.copy() produces different results due to sharing of the + # Mesh instance. + self.cube_2 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy) + + match scenario: + case "metadata_inequality": + self.cube_2.long_name = "different" + case "coord_inequality": + coord = self.cube_2.coord(self.coord_name) + coord.points = coord.core_points() * 2 + case "data_inequality": + self.cube_2.data = self.cube_2.core_data() * 2 + case "all_equal": + pass + case _: + message = f"Unknown scenario: {scenario}" + raise NotImplementedError(message) + + def time_equality(self, lazy: bool, __, ___) -> None: + _ = self.cube_1 == self.cube_2 + if lazy: + for cube in (self.cube_1, self.cube_2): + # Confirm that this benchmark is safe for repetition. + assert cube.coord(self.coord_name).has_lazy_points() + assert cube.has_lazy_data() diff --git a/benchmarks/benchmarks_iris/generate_data/__init__.py b/benchmarks/benchmarks_iris/generate_data/__init__.py new file mode 100644 index 00000000000..9a3671389b1 --- /dev/null +++ b/benchmarks/benchmarks_iris/generate_data/__init__.py @@ -0,0 +1,135 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Scripts for generating supporting data for benchmarking. + +Data generated using this repo should use :func:`run_function_elsewhere`, which +means that data is generated using a fixed version of this repo and a fixed +environment, rather than those that get changed when the benchmarking run +checks out a new commit. + +Downstream use of data generated 'elsewhere' requires saving; usually in a +NetCDF file. Could also use pickling but there is a potential risk if the +benchmark sequence runs over two different Python versions. + +""" + +from contextlib import contextmanager +from inspect import getsource +from os import environ +from pathlib import Path +from subprocess import CalledProcessError, check_output, run +from textwrap import dedent +from warnings import warn + +from iris._lazy_data import as_concrete_data +from iris.fileformats import netcdf + +#: Python executable used by :func:`run_function_elsewhere`, set via env +#: variable of same name. Must be path of Python within an environment that +#: includes this repo (including dependencies and test modules) and Mule. +try: + DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"] + _ = check_output([DATA_GEN_PYTHON, "-c", "a = True"]) +except KeyError: + error = "Env variable DATA_GEN_PYTHON not defined." + raise KeyError(error) +except (CalledProcessError, FileNotFoundError, PermissionError): + error = "Env variable DATA_GEN_PYTHON not a runnable python executable path." + raise ValueError(error) + +# The default location of data files used in benchmarks. Used by CI. +default_data_dir = (Path(__file__).parents[2] / ".data").resolve() +# Optionally override the default data location with environment variable. +BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir)) +if BENCHMARK_DATA == default_data_dir: + BENCHMARK_DATA.mkdir(exist_ok=True) + message = ( + f"No BENCHMARK_DATA env var, defaulting to {BENCHMARK_DATA}. " + "Note that some benchmark files are GB in size." + ) + warn(message) +elif not BENCHMARK_DATA.is_dir(): + message = f"Not a directory: {BENCHMARK_DATA} ." + raise ValueError(message) + +# Manual flag to allow the rebuilding of synthetic data. +# False forces a benchmark run to re-make all the data files. +REUSE_DATA = True + + +class DataGenerationError(Exception): + """Exception raised for errors during data generation.""" + + pass + + +def run_function_elsewhere(func_to_run, *args, **kwargs): + """Run a given function using the :const:`DATA_GEN_PYTHON` executable. + + This structure allows the function to be written natively. + + Parameters + ---------- + func_to_run : FunctionType + The function object to be run. + NOTE: the function must be completely self-contained, i.e. perform all + its own imports (within the target :const:`DATA_GEN_PYTHON` + environment). + *args : tuple, optional + Function call arguments. Must all be expressible as simple literals, + i.e. the ``repr`` must be a valid literal expression. + **kwargs: dict, optional + Function call keyword arguments. All values must be expressible as + simple literals (see ``*args``). + + Returns + ------- + str + The ``stdout`` from the run. + + """ + func_string = dedent(getsource(func_to_run)) + func_string = func_string.replace("@staticmethod\n", "") + func_call_term_strings = [repr(arg) for arg in args] + func_call_term_strings += [f"{name}={repr(val)}" for name, val in kwargs.items()] + func_call_string = ( + f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")" + ) + python_string = "\n".join([func_string, func_call_string]) + + try: + result = run( + [DATA_GEN_PYTHON, "-c", python_string], + capture_output=True, + check=True, + text=True, + ) + except CalledProcessError as error_: + # From None 'breaks' the error chain - we don't want the original + # traceback since it is long and confusing. + raise DataGenerationError(error_.stderr) from None + + return result.stdout + + +@contextmanager +def load_realised(): + """Force NetCDF loading with realised arrays. + + Since passing between data generation and benchmarking environments is via + file loading, but some benchmarks are only meaningful if starting with real + arrays. + """ + from iris.fileformats._nc_load_rules import helpers + from iris.fileformats.netcdf.loader import _get_cf_var_data as pre_patched + + def patched(*args, **kwargs): + return as_concrete_data(pre_patched(*args, **kwargs)) + + netcdf.loader._get_cf_var_data = patched + helpers._get_cf_var_data = patched + yield + netcdf.loader._get_cf_var_data = pre_patched + helpers._get_cf_var_data = pre_patched diff --git a/benchmarks/benchmarks_iris/generate_data/stock.py b/benchmarks/benchmarks_iris/generate_data/stock.py new file mode 100644 index 00000000000..63970cd344d --- /dev/null +++ b/benchmarks/benchmarks_iris/generate_data/stock.py @@ -0,0 +1,184 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Wrappers for using :mod:`iris.tests.stock` methods for benchmarking. + +See :mod:`benchmarks.generate_data` for an explanation of this structure. +""" + +from contextlib import nullcontext +from hashlib import sha256 +import json +from pathlib import Path + +import iris +from iris import cube +from iris.mesh import load_mesh + +from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere + + +def hash_args(*args, **kwargs): + """Convert arguments into a short hash - for preserving args in filenames.""" + arg_string = str(args) + kwarg_string = json.dumps(kwargs) + full_string = arg_string + kwarg_string + return sha256(full_string.encode()).hexdigest()[:10] + + +def _create_file__xios_common(func_name, **kwargs): + def _external(func_name_, temp_file_dir, **kwargs_): + from iris.tests.stock import netcdf + + func = getattr(netcdf, func_name_) + print(func(temp_file_dir, **kwargs_), end="") + + args_hash = hash_args(**kwargs) + save_path = (BENCHMARK_DATA / f"{func_name}_{args_hash}").with_suffix(".nc") + if not REUSE_DATA or not save_path.is_file(): + # The xios functions take control of save location so need to move to + # a more specific name that allows reuse. + actual_path = run_function_elsewhere( + _external, + func_name_=func_name, + temp_file_dir=str(BENCHMARK_DATA), + **kwargs, + ) + Path(actual_path).replace(save_path) + return save_path + + +def create_file__xios_2d_face_half_levels( + temp_file_dir, dataset_name, n_faces=866, n_times=1 +): + """Create file wrapper for :meth:`iris.tests.stock.netcdf.create_file__xios_2d_face_half_levels`. + + Have taken control of temp_file_dir + + todo: is create_file__xios_2d_face_half_levels still appropriate now we can + properly save Mesh Cubes? + """ + return _create_file__xios_common( + func_name="create_file__xios_2d_face_half_levels", + dataset_name=dataset_name, + n_faces=n_faces, + n_times=n_times, + ) + + +def create_file__xios_3d_face_half_levels( + temp_file_dir, dataset_name, n_faces=866, n_times=1, n_levels=38 +): + """Create file wrapper for :meth:`iris.tests.stock.netcdf.create_file__xios_3d_face_half_levels`. + + Have taken control of temp_file_dir + + todo: is create_file__xios_3d_face_half_levels still appropriate now we can + properly save Mesh Cubes? + """ + return _create_file__xios_common( + func_name="create_file__xios_3d_face_half_levels", + dataset_name=dataset_name, + n_faces=n_faces, + n_times=n_times, + n_levels=n_levels, + ) + + +def sample_mesh(n_nodes=None, n_faces=None, n_edges=None, lazy_values=False): + """Sample mesh wrapper for :meth:iris.tests.stock.mesh.sample_mesh`.""" + + def _external(*args, **kwargs): + from iris.mesh import save_mesh + from iris.tests.stock.mesh import sample_mesh + + save_path_ = kwargs.pop("save_path") + # Always saving, so laziness is irrelevant. Use lazy to save time. + kwargs["lazy_values"] = True + new_mesh = sample_mesh(*args, **kwargs) + save_mesh(new_mesh, save_path_) + + arg_list = [n_nodes, n_faces, n_edges] + args_hash = hash_args(*arg_list) + save_path = (BENCHMARK_DATA / f"sample_mesh_{args_hash}").with_suffix(".nc") + if not REUSE_DATA or not save_path.is_file(): + _ = run_function_elsewhere(_external, *arg_list, save_path=str(save_path)) + if not lazy_values: + # Realise everything. + with load_realised(): + mesh = load_mesh(str(save_path)) + else: + mesh = load_mesh(str(save_path)) + return mesh + + +def sample_meshcoord(sample_mesh_kwargs=None, location="face", axis="x"): + """Sample meshcoord wrapper for :meth:`iris.tests.stock.mesh.sample_meshcoord`. + + Parameters deviate from the original as cannot pass a + :class:`iris.mesh.Mesh to the separate Python instance - must + instead generate the Mesh as well. + + MeshCoords cannot be saved to file, so the _external method saves the + MeshCoord's Mesh, then the original Python instance loads in that Mesh and + regenerates the MeshCoord from there. + """ + + def _external(sample_mesh_kwargs_, save_path_): + from iris.mesh import save_mesh + from iris.tests.stock.mesh import sample_mesh, sample_meshcoord + + if sample_mesh_kwargs_: + input_mesh = sample_mesh(**sample_mesh_kwargs_) + else: + input_mesh = None + # Don't parse the location or axis arguments - only saving the Mesh at + # this stage. + new_meshcoord = sample_meshcoord(mesh=input_mesh) + save_mesh(new_meshcoord.mesh, save_path_) + + args_hash = hash_args(**sample_mesh_kwargs) + save_path = (BENCHMARK_DATA / f"sample_mesh_coord_{args_hash}").with_suffix(".nc") + if not REUSE_DATA or not save_path.is_file(): + _ = run_function_elsewhere( + _external, + sample_mesh_kwargs_=sample_mesh_kwargs, + save_path_=str(save_path), + ) + with load_realised(): + source_mesh = load_mesh(str(save_path)) + # Regenerate MeshCoord from its Mesh, which we saved. + return source_mesh.to_MeshCoord(location=location, axis=axis) + + +def realistic_4d_w_everything(w_mesh=False, lazy=False) -> iris.cube.Cube: + """Run :func:`iris.tests.stock.realistic_4d_w_everything` in ``DATA_GEN_PYTHON``. + + Parameters + ---------- + w_mesh : bool + See :func:`iris.tests.stock.realistic_4d_w_everything` for details. + lazy : bool + If True, the Cube will be returned with all arrays as they would + normally be loaded from file (i.e. most will still be lazy Dask + arrays). If False, all arrays (except derived coordinates) will be + realised NumPy arrays. + + """ + + def _external(w_mesh_: str, save_path_: str): + import iris + from iris.tests.stock import realistic_4d_w_everything + + cube = realistic_4d_w_everything(w_mesh=bool(w_mesh_)) + iris.save(cube, save_path_) + + save_path = (BENCHMARK_DATA / f"realistic_4d_w_everything_{w_mesh}").with_suffix( + ".nc" + ) + if not REUSE_DATA or not save_path.is_file(): + _ = run_function_elsewhere(_external, w_mesh_=w_mesh, save_path_=str(save_path)) + context = nullcontext() if lazy else load_realised() + with context: + return iris.load_cube(save_path, "air_potential_temperature") diff --git a/benchmarks/benchmarks_iris/generate_data/ugrid.py b/benchmarks/benchmarks_iris/generate_data/ugrid.py new file mode 100644 index 00000000000..2cef4752eee --- /dev/null +++ b/benchmarks/benchmarks_iris/generate_data/ugrid.py @@ -0,0 +1,190 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Scripts for generating supporting data for UGRID-related benchmarking.""" + +from iris import load_cube as iris_loadcube + +from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere +from .stock import ( + create_file__xios_2d_face_half_levels, + create_file__xios_3d_face_half_levels, +) + + +def generate_cube_like_2d_cubesphere(n_cube: int, with_mesh: bool, output_path: str): + """Construct and save to file an LFRIc cubesphere-like cube. + + Construct and save to file an LFRIc cubesphere-like cube for a given + cubesphere size, *or* a simpler structured (UM-like) cube of equivalent + size. + + NOTE: this function is *NEVER* called from within this actual package. + Instead, it is to be called via benchmarks.remote_data_generation, + so that it can use up-to-date facilities, independent of the ASV controlled + environment which contains the "Iris commit under test". + + This means: + + * it must be completely self-contained : i.e. it includes all its + own imports, and saves results to an output file. + + """ + from iris import save + from iris.tests.stock.mesh import sample_mesh, sample_mesh_cube + + n_face_nodes = n_cube * n_cube + n_faces = 6 * n_face_nodes + + # Set n_nodes=n_faces and n_edges=2*n_faces + # : Not exact, but similar to a 'real' cubesphere. + n_nodes = n_faces + n_edges = 2 * n_faces + if with_mesh: + mesh = sample_mesh( + n_nodes=n_nodes, n_faces=n_faces, n_edges=n_edges, lazy_values=True + ) + cube = sample_mesh_cube(mesh=mesh, n_z=1) + else: + cube = sample_mesh_cube(nomesh_faces=n_faces, n_z=1) + + # Strip off the 'extra' aux-coord mapping the mesh, which sample-cube adds + # but which we don't want. + cube.remove_coord("mesh_face_aux") + + # Save the result to a named file. + save(cube, output_path) + + +def make_cube_like_2d_cubesphere(n_cube: int, with_mesh: bool): + """Generate an LFRIc cubesphere-like cube. + + Generate an LFRIc cubesphere-like cube for a given cubesphere size, + *or* a simpler structured (UM-like) cube of equivalent size. + + All the cube data, coords and mesh content are LAZY, and produced without + allocating large real arrays (to allow peak-memory testing). + + NOTE: the actual cube generation is done in a stable Iris environment via + benchmarks.remote_data_generation, so it is all channeled via cached netcdf + files in our common testdata directory. + + """ + identifying_filename = f"cube_like_2d_cubesphere_C{n_cube}_Mesh={with_mesh}.nc" + filepath = BENCHMARK_DATA / identifying_filename + if not filepath.exists(): + # Create the required testfile, by running the generation code remotely + # in a 'fixed' python environment. + run_function_elsewhere( + generate_cube_like_2d_cubesphere, + n_cube, + with_mesh=with_mesh, + output_path=str(filepath), + ) + + # File now *should* definitely exist: content is simply the desired cube. + cube = iris_loadcube(str(filepath)) + + # Ensure correct laziness. + _ = cube.data + for coord in cube.coords(mesh_coords=False): + assert not coord.has_lazy_points() + assert not coord.has_lazy_bounds() + if cube.mesh: + for coord in cube.mesh.coords(): + assert coord.has_lazy_points() + for conn in cube.mesh.connectivities(): + assert conn.has_lazy_indices() + + return cube + + +def make_cube_like_umfield(xy_dims): + """Create a "UM-like" cube with lazy content, for save performance testing. + + Roughly equivalent to a single current UM cube, to be compared with + a "make_cube_like_2d_cubesphere(n_cube=_N_CUBESPHERE_UM_EQUIVALENT)" + (see below). + + Note: probably a bit over-simplified, as there is no time coord, but that + is probably equally true of our LFRic-style synthetic data. + + Parameters + ---------- + xy_dims : 2-tuple + Set the horizontal dimensions = n-lats, n-lons. + + """ + + def _external(xy_dims_, save_path_): + from dask import array as da + import numpy as np + + from iris import save + from iris.coords import DimCoord + from iris.cube import Cube + + nz, ny, nx = (1,) + xy_dims_ + + # Base data : Note this is float32 not float64 like LFRic/XIOS outputs. + lazy_data = da.zeros((nz, ny, nx), dtype=np.float32) + cube = Cube(lazy_data, long_name="structured_phenom") + + # Add simple dim coords also. + z_dimco = DimCoord(np.arange(nz), long_name="level", units=1) + y_dimco = DimCoord( + np.linspace(-90.0, 90.0, ny), + standard_name="latitude", + units="degrees", + ) + x_dimco = DimCoord( + np.linspace(-180.0, 180.0, nx), + standard_name="longitude", + units="degrees", + ) + for idim, co in enumerate([z_dimco, y_dimco, x_dimco]): + cube.add_dim_coord(co, idim) + + save(cube, save_path_) + + save_path = (BENCHMARK_DATA / f"make_cube_like_umfield_{xy_dims}").with_suffix( + ".nc" + ) + if not REUSE_DATA or not save_path.is_file(): + _ = run_function_elsewhere(_external, xy_dims, str(save_path)) + with load_realised(): + cube = iris_loadcube(str(save_path)) + + return cube + + +def make_cubesphere_testfile(c_size, n_levels=0, n_times=1): + """Build a C cubesphere testfile in a given directory. + + Build a C cubesphere testfile in a given directory, with a standard naming. + If n_levels > 0 specified: 3d file with the specified number of levels. + Return the file path. + + TODO: is create_file__xios... still appropriate now we can properly save Mesh Cubes? + + """ + n_faces = 6 * c_size * c_size + stem_name = f"mesh_cubesphere_C{c_size}_t{n_times}" + kwargs = dict( + temp_file_dir=None, + dataset_name=stem_name, # N.B. function adds the ".nc" extension + n_times=n_times, + n_faces=n_faces, + ) + + three_d = n_levels > 0 + if three_d: + kwargs["n_levels"] = n_levels + kwargs["dataset_name"] += f"_{n_levels}levels" + func = create_file__xios_3d_face_half_levels + else: + func = create_file__xios_2d_face_half_levels + + file_path = func(**kwargs) + return file_path diff --git a/benchmarks/benchmarks_iris/generate_data/um_files.py b/benchmarks/benchmarks_iris/generate_data/um_files.py new file mode 100644 index 00000000000..e2bab6b2748 --- /dev/null +++ b/benchmarks/benchmarks_iris/generate_data/um_files.py @@ -0,0 +1,198 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Generate FF, PP and NetCDF files based on a minimal synthetic FF file. + +NOTE: uses the Mule package, so depends on an environment with Mule installed. +""" + + +def _create_um_files( + len_x: int, len_y: int, len_z: int, len_t: int, compress, save_paths: dict +) -> None: + """Generate an FF object of given shape and compression, save to FF/PP/NetCDF. + + This is run externally + (:func:`benchmarks.generate_data.run_function_elsewhere`), so all imports + are self-contained and input parameters are simple types. + """ + from copy import deepcopy + from datetime import datetime + from tempfile import NamedTemporaryFile + + from mule import ArrayDataProvider, Field3, FieldsFile + import mule.ff + from mule.pp import fields_to_pp_file + import numpy as np + + from iris import load_cube + from iris import save as save_cube + + def to_bytes_patch(self, field): + data = field.get_data() + dtype = mule.ff._DATA_DTYPES[self.WORD_SIZE][field.lbuser1] + data = data.astype(dtype) + return data.tobytes(), data.size + + # TODO: remove this patch when fixed in mule, see https://github.com/MetOffice/simulation-systems/discussions/389 + mule.ff._WriteFFOperatorUnpacked.to_bytes = to_bytes_patch + + template = { + "fixed_length_header": {"dataset_type": 3, "grid_staggering": 3}, + "integer_constants": { + "num_p_levels": len_z, + "num_cols": len_x, + "num_rows": len_y, + }, + "real_constants": {}, + "level_dependent_constants": {"dims": (len_z + 1, None)}, + } + new_ff = FieldsFile.from_template(deepcopy(template)) + + data_array = np.arange(len_x * len_y).reshape(len_x, len_y) + array_provider = ArrayDataProvider(data_array) + + def add_field(level_: int, time_step_: int) -> None: + """Add a minimal field to the new :class:`~mule.FieldsFile`. + + Includes the minimum information to allow Mule saving and Iris + loading, as well as incrementation for vertical levels and time + steps to allow generation of z and t dimensions. + """ + new_field = Field3.empty() + # To correspond to the header-release 3 class used. + new_field.lbrel = 3 + # Mule uses the first element of the lookup to test for + # unpopulated fields (and skips them), so the first element should + # be set to something. The year will do. + new_field.raw[1] = datetime.now().year + + # Horizontal. + new_field.lbcode = 1 + new_field.lbnpt = len_x + new_field.lbrow = len_y + new_field.bdx = new_ff.real_constants.col_spacing + new_field.bdy = new_ff.real_constants.row_spacing + new_field.bzx = new_ff.real_constants.start_lon - 0.5 * new_field.bdx + new_field.bzy = new_ff.real_constants.start_lat - 0.5 * new_field.bdy + + # Hemisphere. + new_field.lbhem = 32 + # Processing. + new_field.lbproc = 0 + + # Vertical. + # Hybrid height values by simulating sequences similar to those in a + # theta file. + new_field.lbvc = 65 + if level_ == 0: + new_field.lblev = 9999 + else: + new_field.lblev = level_ + + level_1 = level_ + 1 + six_rec = 20 / 3 + three_rec = six_rec / 2 + + new_field.blev = level_1**2 * six_rec - six_rec + new_field.brsvd1 = level_1**2 * six_rec + (six_rec * level_1) - three_rec + + brsvd2_simulated = np.linspace(0.995, 0, len_z) + shift = min(len_z, 2) + bhrlev_simulated = np.concatenate([np.ones(shift), brsvd2_simulated[:-shift]]) + new_field.brsvd2 = brsvd2_simulated[level_] + new_field.bhrlev = bhrlev_simulated[level_] + + # Time. + new_field.lbtim = 11 + + new_field.lbyr = time_step_ + for attr_name in ["lbmon", "lbdat", "lbhr", "lbmin", "lbsec"]: + setattr(new_field, attr_name, 0) + + new_field.lbyrd = time_step_ + 1 + for attr_name in ["lbmond", "lbdatd", "lbhrd", "lbmind", "lbsecd"]: + setattr(new_field, attr_name, 0) + + # Data and packing. + new_field.lbuser1 = 1 + new_field.lbpack = int(compress) + new_field.bacc = 0 + new_field.bmdi = -1 + new_field.lbext = 0 + new_field.set_data_provider(array_provider) + + new_ff.fields.append(new_field) + + for time_step in range(len_t): + for level in range(len_z): + add_field(level, time_step + 1) + + ff_path = save_paths.get("FF", None) + pp_path = save_paths.get("PP", None) + nc_path = save_paths.get("NetCDF", None) + + if ff_path: + new_ff.to_file(ff_path) + if pp_path: + fields_to_pp_file(str(pp_path), new_ff.fields) + if nc_path: + temp_ff_path = None + # Need an Iris Cube from the FF content. + if ff_path: + # Use the existing file. + ff_cube = load_cube(ff_path) + else: + # Make a temporary file. + temp_ff_path = NamedTemporaryFile() + new_ff.to_file(temp_ff_path.name) + ff_cube = load_cube(temp_ff_path.name) + + save_cube(ff_cube, nc_path, zlib=compress) + if temp_ff_path: + temp_ff_path.close() + + +FILE_EXTENSIONS = {"FF": "", "PP": ".pp", "NetCDF": ".nc"} + + +def create_um_files( + len_x: int, + len_y: int, + len_z: int, + len_t: int, + compress: bool, + file_types: list, +) -> dict: + """Generate FF-based FF / PP / NetCDF files with specified shape and compression. + + All files representing a given shape are saved in a dedicated directory. A + dictionary of the saved paths is returned. + + If the required files exist, they are re-used, unless + :const:`benchmarks.REUSE_DATA` is ``False``. + """ + # Self contained imports to avoid linting confusion with _create_um_files(). + from . import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere + + save_name_sections = ["UM", len_x, len_y, len_z, len_t] + save_name = "_".join(str(section) for section in save_name_sections) + save_dir = BENCHMARK_DATA / save_name + if not save_dir.is_dir(): + save_dir.mkdir(parents=True) + + save_paths = {} + files_exist = True + for file_type in file_types: + file_ext = FILE_EXTENSIONS[file_type] + save_path = (save_dir / f"{compress}").with_suffix(file_ext) + files_exist = files_exist and save_path.is_file() + save_paths[file_type] = str(save_path) + + if not REUSE_DATA or not files_exist: + _ = run_function_elsewhere( + _create_um_files, len_x, len_y, len_z, len_t, compress, save_paths + ) + + return save_paths diff --git a/benchmarks/benchmarks_iris/import_iris.py b/benchmarks/benchmarks_iris/import_iris.py new file mode 100644 index 00000000000..ff5f19e4211 --- /dev/null +++ b/benchmarks/benchmarks_iris/import_iris.py @@ -0,0 +1,278 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. + +"""Import iris benchmarking.""" + +from importlib import import_module, reload + +################ +# Prepare info for reset_colormaps: + +# Import and capture colormaps. +from matplotlib import colormaps # isort:skip + +_COLORMAPS_ORIG = set(colormaps) + +# Import iris.palette, which modifies colormaps. +import iris.palette + +# Derive which colormaps have been added by iris.palette. +_COLORMAPS_MOD = set(colormaps) +COLORMAPS_EXTRA = _COLORMAPS_MOD - _COLORMAPS_ORIG + +# Touch iris.palette to prevent linters complaining. +_ = iris.palette + +################ + + +class Iris: + @staticmethod + def _import(module_name, reset_colormaps=False): + """Have experimented with adding sleep() commands into the imported modules. + + The results reveal: + + ASV avoids invoking `import x` if nothing gets called in the + benchmark (some imports were timed, but only those where calls + happened during import). + + Using reload() is not identical to importing, but does produce + results that are very close to expected import times, so this is fine + for monitoring for regressions. + It is also ideal for accurate repetitions, without the need to mess + with the ASV `number` attribute etc, since cached imports are not used + and the repetitions are therefore no faster than the first run. + """ + mod = import_module(module_name) + + if reset_colormaps: + # Needed because reload() will attempt to register new colormaps a + # second time, which errors by default. + for cm_name in COLORMAPS_EXTRA: + colormaps.unregister(cm_name) + + reload(mod) + + def time_iris(self): + self._import("iris") + + def time__concatenate(self): + self._import("iris._concatenate") + + def time__constraints(self): + self._import("iris._constraints") + + def time__data_manager(self): + self._import("iris._data_manager") + + def time__deprecation(self): + self._import("iris._deprecation") + + def time__lazy_data(self): + self._import("iris._lazy_data") + + def time__merge(self): + self._import("iris._merge") + + def time__representation(self): + self._import("iris._representation") + + def time_analysis(self): + self._import("iris.analysis") + + def time_analysis__area_weighted(self): + self._import("iris.analysis._area_weighted") + + def time_analysis__grid_angles(self): + self._import("iris.analysis._grid_angles") + + def time_analysis__interpolation(self): + self._import("iris.analysis._interpolation") + + def time_analysis__regrid(self): + self._import("iris.analysis._regrid") + + def time_analysis__scipy_interpolate(self): + self._import("iris.analysis._scipy_interpolate") + + def time_analysis_calculus(self): + self._import("iris.analysis.calculus") + + def time_analysis_cartography(self): + self._import("iris.analysis.cartography") + + def time_analysis_geomerty(self): + self._import("iris.analysis.geometry") + + def time_analysis_maths(self): + self._import("iris.analysis.maths") + + def time_analysis_stats(self): + self._import("iris.analysis.stats") + + def time_analysis_trajectory(self): + self._import("iris.analysis.trajectory") + + def time_aux_factory(self): + self._import("iris.aux_factory") + + def time_common(self): + self._import("iris.common") + + def time_common_lenient(self): + self._import("iris.common.lenient") + + def time_common_metadata(self): + self._import("iris.common.metadata") + + def time_common_mixin(self): + self._import("iris.common.mixin") + + def time_common_resolve(self): + self._import("iris.common.resolve") + + def time_config(self): + self._import("iris.config") + + def time_coord_categorisation(self): + self._import("iris.coord_categorisation") + + def time_coord_systems(self): + self._import("iris.coord_systems") + + def time_coords(self): + self._import("iris.coords") + + def time_cube(self): + self._import("iris.cube") + + def time_exceptions(self): + self._import("iris.exceptions") + + def time_experimental(self): + self._import("iris.experimental") + + def time_fileformats(self): + self._import("iris.fileformats") + + def time_fileformats__ff(self): + self._import("iris.fileformats._ff") + + def time_fileformats__ff_cross_references(self): + self._import("iris.fileformats._ff_cross_references") + + def time_fileformats__pp_lbproc_pairs(self): + self._import("iris.fileformats._pp_lbproc_pairs") + + def time_fileformats_structured_array_identification(self): + self._import("iris.fileformats._structured_array_identification") + + def time_fileformats_abf(self): + self._import("iris.fileformats.abf") + + def time_fileformats_cf(self): + self._import("iris.fileformats.cf") + + def time_fileformats_dot(self): + self._import("iris.fileformats.dot") + + def time_fileformats_name(self): + self._import("iris.fileformats.name") + + def time_fileformats_name_loaders(self): + self._import("iris.fileformats.name_loaders") + + def time_fileformats_netcdf(self): + self._import("iris.fileformats.netcdf") + + def time_fileformats_nimrod(self): + self._import("iris.fileformats.nimrod") + + def time_fileformats_nimrod_load_rules(self): + self._import("iris.fileformats.nimrod_load_rules") + + def time_fileformats_pp(self): + self._import("iris.fileformats.pp") + + def time_fileformats_pp_load_rules(self): + self._import("iris.fileformats.pp_load_rules") + + def time_fileformats_pp_save_rules(self): + self._import("iris.fileformats.pp_save_rules") + + def time_fileformats_rules(self): + self._import("iris.fileformats.rules") + + def time_fileformats_um(self): + self._import("iris.fileformats.um") + + def time_fileformats_um__fast_load(self): + self._import("iris.fileformats.um._fast_load") + + def time_fileformats_um__fast_load_structured_fields(self): + self._import("iris.fileformats.um._fast_load_structured_fields") + + def time_fileformats_um__ff_replacement(self): + self._import("iris.fileformats.um._ff_replacement") + + def time_fileformats_um__optimal_array_structuring(self): + self._import("iris.fileformats.um._optimal_array_structuring") + + def time_fileformats_um_cf_map(self): + self._import("iris.fileformats.um_cf_map") + + def time_io(self): + self._import("iris.io") + + def time_io_format_picker(self): + self._import("iris.io.format_picker") + + def time_iterate(self): + self._import("iris.iterate") + + def time_palette(self): + self._import("iris.palette", reset_colormaps=True) + + def time_plot(self): + self._import("iris.plot") + + def time_quickplot(self): + self._import("iris.quickplot") + + def time_std_names(self): + self._import("iris.std_names") + + def time_symbols(self): + self._import("iris.symbols") + + def time_tests(self): + self._import("iris.tests") + + def time_time(self): + self._import("iris.time") + + def time_util(self): + self._import("iris.util") + + # third-party imports + + def time_third_party_cartopy(self): + self._import("cartopy") + + def time_third_party_cf_units(self): + self._import("cf_units") + + def time_third_party_cftime(self): + self._import("cftime") + + def time_third_party_matplotlib(self): + self._import("matplotlib") + + def time_third_party_numpy(self): + self._import("numpy") + + def time_third_party_scipy(self): + self._import("scipy") diff --git a/benchmarks/benchmarks_iris/iterate.py b/benchmarks/benchmarks_iris/iterate.py new file mode 100644 index 00000000000..664bcf8ba2c --- /dev/null +++ b/benchmarks/benchmarks_iris/iterate.py @@ -0,0 +1,26 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Iterate benchmark tests.""" + +import numpy as np + +from iris import coords, cube, iterate + + +class IZip: + def setup(self): + data_2d = np.zeros((1000,) * 2) + data_1d = data_2d[0] + local_cube = cube.Cube(data_2d) + coord_a = coords.AuxCoord(points=data_1d, long_name="a") + coord_b = coords.AuxCoord(points=data_1d, long_name="b") + self.coord_names = (coord.long_name for coord in (coord_a, coord_b)) + + local_cube.add_aux_coord(coord_a, 0) + local_cube.add_aux_coord(coord_b, 1) + self.cube = local_cube + + def time_izip(self): + iterate.izip(self.cube, coords=self.coord_names) diff --git a/benchmarks/benchmarks_iris/load/__init__.py b/benchmarks/benchmarks_iris/load/__init__.py new file mode 100644 index 00000000000..5c5a62a5151 --- /dev/null +++ b/benchmarks/benchmarks_iris/load/__init__.py @@ -0,0 +1,221 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File loading benchmark tests.""" + +from iris import AttributeConstraint, Constraint, load, load_cube +from iris.cube import Cube +from iris.fileformats.um import structured_um_loading + +from ..generate_data import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere +from ..generate_data.um_files import create_um_files + + +class LoadAndRealise: + # For data generation + timeout = 600.0 + params = ( + [(50, 50, 2), (1280, 960, 5), (2, 2, 1000)], + [False, True], + ["FF", "PP", "NetCDF"], + ) + param_names = ["xyz", "compressed", "file_format"] + + def setup_cache(self) -> dict: + file_type_args = self.params[2] + file_path_dict: dict[tuple[int, int, int], dict[bool, dict[str, str]]] = {} + for xyz in self.params[0]: + file_path_dict[xyz] = {} + x, y, z = xyz + for compress in self.params[1]: + file_path_dict[xyz][compress] = create_um_files( + x, y, z, 1, compress, file_type_args + ) + return file_path_dict + + def setup( + self, + file_path_dict: dict, + xyz: tuple, + compress: bool, + file_format: str, + ) -> None: + self.file_path = file_path_dict[xyz][compress][file_format] + self.cube = self.load() + + def load(self) -> Cube: + return load_cube(self.file_path) + + def time_load(self, _, __, ___, ____) -> None: + _ = self.load() + + def time_realise(self, _, __, ___, ____) -> None: + # Don't touch cube.data - permanent realisation plays badly with ASV's + # re-run strategy. + assert self.cube.has_lazy_data() + self.cube.core_data().compute() + + +class STASHConstraint: + # xyz sizes mimic LoadAndRealise to maximise file reuse. + params = ([(2, 2, 2), (1280, 960, 5), (2, 2, 1000)], ["FF", "PP"]) + param_names = ["xyz", "file_format"] + + def setup_cache(self) -> dict: + file_type_args = self.params[1] + file_path_dict = {} + for xyz in self.params[0]: + x, y, z = xyz + file_path_dict[xyz] = create_um_files(x, y, z, 1, False, file_type_args) + return file_path_dict + + def setup(self, file_path_dict: dict, xyz: tuple, file_format: str) -> None: + self.file_path = file_path_dict[xyz][file_format] + + def time_stash_constraint(self, _, __, ___) -> None: + _ = load_cube(self.file_path, AttributeConstraint(STASH="m??s??i901")) + + +class TimeConstraint: + params = ([3, 20], ["FF", "PP", "NetCDF"]) + param_names = ["time_dim_len", "file_format"] + + def setup_cache(self) -> dict: + file_type_args = self.params[1] + file_path_dict = {} + for time_dim_len in self.params[0]: + file_path_dict[time_dim_len] = create_um_files( + 20, 20, 5, time_dim_len, False, file_type_args + ) + return file_path_dict + + def setup(self, file_path_dict: dict, time_dim_len: int, file_format: str) -> None: + self.file_path = file_path_dict[time_dim_len][file_format] + self.time_constr = Constraint(time=lambda cell: cell.point.year < 3) + + def time_time_constraint(self, _, __, ___) -> None: + _ = load_cube(self.file_path, self.time_constr) + + +class ManyVars: + FILE_PATH = BENCHMARK_DATA / "many_var_file.nc" + + @staticmethod + def _create_file(save_path: str) -> None: + """Run externally - everything must be self-contained.""" + import numpy as np + + from iris import save + from iris.coords import AuxCoord + from iris.cube import Cube + + data_len = 8 + data = np.arange(data_len) + cube = Cube(data, units="unknown") + extra_vars = 80 + names = ["coord_" + str(i) for i in range(extra_vars)] + for name in names: + coord = AuxCoord(data, long_name=name, units="unknown") + cube.add_aux_coord(coord, 0) + save(cube, save_path) + + def setup_cache(self) -> None: + if not REUSE_DATA or not self.FILE_PATH.is_file(): + # See :mod:`benchmarks.generate_data` docstring for full explanation. + _ = run_function_elsewhere( + self._create_file, + str(self.FILE_PATH), + ) + + def time_many_var_load(self) -> None: + _ = load(str(self.FILE_PATH)) + + +class ManyCubes: + FILE_PATH = BENCHMARK_DATA / "many_cube_file.nc" + + @staticmethod + def _create_file(save_path: str) -> None: + """Run externally - everything must be self-contained.""" + import numpy as np + + from iris import save + from iris.coords import AuxCoord, DimCoord + from iris.cube import Cube, CubeList + + data_len = 81920 + bnds_len = 3 + data = np.arange(data_len).astype(np.float32) + bnds_data = ( + np.arange(data_len * bnds_len) + .astype(np.float32) + .reshape(data_len, bnds_len) + ) + time = DimCoord(np.array([0]), standard_name="time") + lat = AuxCoord( + data, bounds=bnds_data, standard_name="latitude", units="degrees" + ) + lon = AuxCoord( + data, bounds=bnds_data, standard_name="longitude", units="degrees" + ) + cube = Cube(data.reshape(1, -1), units="unknown") + cube.add_dim_coord(time, 0) + cube.add_aux_coord(lat, 1) + cube.add_aux_coord(lon, 1) + + n_cubes = 100 + cubes = CubeList() + for i in range(n_cubes): + cube = cube.copy() + cube.long_name = f"var_{i}" + cubes.append(cube) + save(cubes, save_path) + + def setup_cache(self) -> None: + if not REUSE_DATA or not self.FILE_PATH.is_file(): + # See :mod:`benchmarks.generate_data` docstring for full explanation. + _ = run_function_elsewhere( + self._create_file, + str(self.FILE_PATH), + ) + + def time_many_cube_load(self) -> None: + _ = load(str(self.FILE_PATH)) + + +class StructuredFF: + """Test structured loading of a large-ish fieldsfile. + + Structured load of the larger size should show benefit over standard load, + avoiding the cost of merging. + """ + + params = ([(2, 2, 2), (1280, 960, 5), (2, 2, 1000)], [False, True]) + param_names = ["xyz", "structured_loading"] + + def setup_cache(self) -> dict: + file_path_dict = {} + for xyz in self.params[0]: + x, y, z = xyz + file_path_dict[xyz] = create_um_files(x, y, z, 1, False, ["FF"]) + return file_path_dict + + def setup(self, file_path_dict, xyz, structured_load): + self.file_path = file_path_dict[xyz]["FF"] + self.structured_load = structured_load + + def load(self): + """Load the whole file (in fact there is only 1 cube).""" + + def _load(): + _ = load(self.file_path) + + if self.structured_load: + with structured_um_loading(): + _load() + else: + _load() + + def time_structured_load(self, _, __, ___): + self.load() diff --git a/benchmarks/benchmarks_iris/load/ugrid.py b/benchmarks/benchmarks_iris/load/ugrid.py new file mode 100644 index 00000000000..5ad0086ef36 --- /dev/null +++ b/benchmarks/benchmarks_iris/load/ugrid.py @@ -0,0 +1,115 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Mesh data loading benchmark tests.""" + +from iris import load_cube as iris_load_cube +from iris.mesh import load_mesh as iris_load_mesh + +from ..generate_data.stock import create_file__xios_2d_face_half_levels + + +def synthetic_data(**kwargs): + # Ensure all uses of the synthetic data function use the common directory. + # File location is controlled by :mod:`generate_data`, hence temp_file_dir=None. + return create_file__xios_2d_face_half_levels(temp_file_dir=None, **kwargs) + + +def load_cube(*args, **kwargs): + return iris_load_cube(*args, **kwargs) + + +def load_mesh(*args, **kwargs): + return iris_load_mesh(*args, **kwargs) + + +class BasicLoading: + params = [1, int(2e5)] + param_names = ["number of faces"] + + def setup_common(self, **kwargs): + self.data_path = synthetic_data(**kwargs) + + def setup(self, *args): + self.setup_common(dataset_name="Loading", n_faces=args[0]) + + def time_load_file(self, *args): + _ = load_cube(str(self.data_path)) + + def time_load_mesh(self, *args): + _ = load_mesh(str(self.data_path)) + + +class BasicLoadingTime(BasicLoading): + """Same as BasicLoading, but scaling over a time series - an unlimited dimension.""" + + # NOTE iris#4834 - careful how big the time dimension is (time dimension + # is UNLIMITED). + + param_names = ["number of time steps"] + + def setup(self, *args): + self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0]) + + +class DataRealisation: + # Prevent repeat runs between setup() runs - data won't be lazy after 1st. + number = 1 + # Compensate for reduced certainty by increasing number of repeats. + repeat = (10, 10, 10.0) + # Prevent ASV running its warmup, which ignores `number` and would + # therefore get a false idea of typical run time since the data would stop + # being lazy. + warmup_time = 0.0 + timeout = 300.0 + + params = [int(1e4), int(2e5)] + param_names = ["number of faces"] + + def setup_common(self, **kwargs): + data_path = synthetic_data(**kwargs) + self.cube = load_cube(str(data_path)) + + def setup(self, *args): + self.setup_common(dataset_name="Realisation", n_faces=args[0]) + + def time_realise_data(self, *args): + assert self.cube.has_lazy_data() + _ = self.cube.data[0] + + +class DataRealisationTime(DataRealisation): + """Same as DataRealisation, but scaling over a time series - an unlimited dimension.""" + + param_names = ["number of time steps"] + + def setup(self, *args): + self.setup_common(dataset_name="Realisation", n_faces=1, n_times=args[0]) + + +class Callback: + params = [1, int(2e5)] + param_names = ["number of faces"] + + def setup_common(self, **kwargs): + def callback(cube, field, filename): + return cube[::2] + + self.data_path = synthetic_data(**kwargs) + self.callback = callback + + def setup(self, *args): + self.setup_common(dataset_name="Loading", n_faces=args[0]) + + def time_load_file_callback(self, *args): + _ = load_cube(str(self.data_path), callback=self.callback) + + +class CallbackTime(Callback): + """Same as Callback, but scaling over a time series - an unlimited dimension.""" + + param_names = ["number of time steps"] + + def setup(self, *args): + self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0]) diff --git a/benchmarks/benchmarks_iris/merge_concat.py b/benchmarks/benchmarks_iris/merge_concat.py new file mode 100644 index 00000000000..2d3738683ad --- /dev/null +++ b/benchmarks/benchmarks_iris/merge_concat.py @@ -0,0 +1,72 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``.""" + +import warnings + +import numpy as np + +from iris.cube import CubeList +from iris.warnings import IrisVagueMetadataWarning + +from .generate_data.stock import realistic_4d_w_everything + + +class Merge: + # TODO: Improve coverage. + + cube_list: CubeList + + def setup(self): + source_cube = realistic_4d_w_everything() + + # Merge does not yet fully support cell measures and ancillary variables. + for cm in source_cube.cell_measures(): + source_cube.remove_cell_measure(cm) + for av in source_cube.ancillary_variables(): + source_cube.remove_ancillary_variable(av) + + second_cube = source_cube.copy() + scalar_coord = second_cube.coords(dimensions=[])[0] + scalar_coord.points = scalar_coord.points + 1 + self.cube_list = CubeList([source_cube, second_cube]) + + def time_merge(self): + _ = self.cube_list.merge_cube() + + def tracemalloc_merge(self): + _ = self.cube_list.merge_cube() + + tracemalloc_merge.number = 3 # type: ignore[attr-defined] + + +class Concatenate: + # TODO: Improve coverage. + + cube_list: CubeList + + params = [[False, True]] + param_names = ["Lazy operations"] + + def setup(self, lazy_run: bool): + warnings.filterwarnings("ignore", message="Ignoring a datum") + warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning) + source_cube = realistic_4d_w_everything(lazy=lazy_run) + self.cube_list = CubeList([source_cube]) + for _ in range(24): + next_cube = self.cube_list[-1].copy() + first_dim_coord = next_cube.coord(dimensions=0, dim_coords=True) + first_dim_coord.points = ( + first_dim_coord.points + np.ptp(first_dim_coord.points) + 1 + ) + self.cube_list.append(next_cube) + + def time_concatenate(self, _): + _ = self.cube_list.concatenate_cube() + + def tracemalloc_concatenate(self, _): + _ = self.cube_list.concatenate_cube() + + tracemalloc_concatenate.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/mesh/__init__.py b/benchmarks/benchmarks_iris/mesh/__init__.py new file mode 100644 index 00000000000..9cc76ce0aa4 --- /dev/null +++ b/benchmarks/benchmarks_iris/mesh/__init__.py @@ -0,0 +1,5 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmark tests for the iris.mesh module.""" diff --git a/benchmarks/benchmarks_iris/mesh/utils/__init__.py b/benchmarks/benchmarks_iris/mesh/utils/__init__.py new file mode 100644 index 00000000000..e20973c0a78 --- /dev/null +++ b/benchmarks/benchmarks_iris/mesh/utils/__init__.py @@ -0,0 +1,5 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmark tests for the iris.mesh.utils module.""" diff --git a/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py b/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py new file mode 100644 index 00000000000..a61deea56d3 --- /dev/null +++ b/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py @@ -0,0 +1,227 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmarks stages of operation. + +Benchmarks stages of operation of the function +:func:`iris.mesh.utils.recombine_submeshes`. + +""" + +import os + +import dask.array as da +import numpy as np + +from iris import load, load_cube, save +from iris.mesh.utils import recombine_submeshes + +from ...generate_data.ugrid import make_cube_like_2d_cubesphere + + +class MixinCombineRegions: + # Characterise time taken + memory-allocated, for various stages of combine + # operations on cubesphere-like test data. + params = [50, 500] + param_names = ["cubesphere-N"] + + def _parametrised_cache_filename(self, n_cubesphere, content_name): + return f"cube_C{n_cubesphere}_{content_name}.nc" + + def _make_region_cubes(self, full_mesh_cube): + """Make a fixed number of region cubes from a full meshcube.""" + # Divide the cube into regions. + n_faces = full_mesh_cube.shape[-1] + # Start with a simple list of face indices + # first extend to multiple of 5 + n_faces_5s = 5 * ((n_faces + 1) // 5) + i_faces = np.arange(n_faces_5s, dtype=int) + # reshape (5N,) to (N, 5) + i_faces = i_faces.reshape((n_faces_5s // 5, 5)) + # reorder [2, 3, 4, 0, 1] within each block of 5 + i_faces = np.concatenate([i_faces[:, 2:], i_faces[:, :2]], axis=1) + # flatten to get [2 3 4 0 1 (-) 8 9 10 6 7 (-) 13 14 15 11 12 ...] + i_faces = i_faces.flatten() + # reduce back to original length, wrap any overflows into valid range + i_faces = i_faces[:n_faces] % n_faces + + # Divide into regions -- always slightly uneven, since 7 doesn't divide + n_regions = 7 + n_facesperregion = n_faces // n_regions + i_face_regions = (i_faces // n_facesperregion) % n_regions + region_inds = [ + np.where(i_face_regions == i_region)[0] for i_region in range(n_regions) + ] + # NOTE: this produces 7 regions, with near-adjacent value ranges but + # with some points "moved" to an adjacent region. + # Also, region-0 is bigger (because of not dividing by 7). + + # Finally, make region cubes with these indices. + region_cubes = [full_mesh_cube[..., inds] for inds in region_inds] + return region_cubes + + def setup_cache(self): + """Cache all the necessary source data on disk.""" + # Control dask, to minimise memory usage + allow largest data. + self.fix_dask_settings() + + for n_cubesphere in self.params: + # Do for each parameter, since "setup_cache" is NOT parametrised + mesh_cube = make_cube_like_2d_cubesphere( + n_cube=n_cubesphere, with_mesh=True + ) + # Save to files which include the parameter in the names. + save( + mesh_cube, + self._parametrised_cache_filename(n_cubesphere, "meshcube"), + ) + region_cubes = self._make_region_cubes(mesh_cube) + save( + region_cubes, + self._parametrised_cache_filename(n_cubesphere, "regioncubes"), + ) + + def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=True): + """Combine tests "standard" setup operation. + + Load the source cubes (full-mesh + region) from disk. + These are specific to the cubesize parameter. + The data is cached on disk rather than calculated, to avoid any + pre-loading of the process memory allocation. + + If 'imaginary_data' is set (default), the region cubes data is replaced + with lazy data in the form of a da.zeros(). Otherwise, the region data + is lazy data from the files. + + If 'create_result_cube' is set, create "self.combined_cube" containing + the (still lazy) result. + + NOTE: various test classes override + extend this. + + """ + # Load source cubes (full-mesh and regions) + self.full_mesh_cube = load_cube( + self._parametrised_cache_filename(n_cubesphere, "meshcube") + ) + self.region_cubes = load( + self._parametrised_cache_filename(n_cubesphere, "regioncubes") + ) + + # Remove all var-names from loaded cubes, which can otherwise cause + # problems. Also implement 'imaginary' data. + for cube in self.region_cubes + [self.full_mesh_cube]: + cube.var_name = None + for coord in cube.coords(): + coord.var_name = None + if imaginary_data: + # Replace cube data (lazy file data) with 'imaginary' data. + # This has the same lazy-array attributes, but is allocated by + # creating chunks on demand instead of loading from file. + data = cube.lazy_data() + data = da.zeros(data.shape, dtype=data.dtype, chunks=data.chunksize) + cube.data = data + + if create_result_cube: + self.recombined_cube = self.recombine() + + # Fix dask usage mode for all the subsequent performance tests. + self.fix_dask_settings() + + def fix_dask_settings(self): + """Fix "standard" dask behaviour for time+space testing. + + Currently this is single-threaded mode, with known chunksize, + which is optimised for space saving so we can test largest data. + + """ + import dask.config as dcfg + + # Use single-threaded, to avoid process-switching costs and minimise memory usage. + # N.B. generally may be slower, but use less memory ? + dcfg.set(scheduler="single-threaded") + # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks + dcfg.set({"array.chunk-size": "128Mib"}) + + def recombine(self): + # A handy general shorthand for the main "combine" operation. + result = recombine_submeshes( + self.full_mesh_cube, + self.region_cubes, + index_coord_name="i_mesh_face", + ) + return result + + +class CombineRegionsCreateCube(MixinCombineRegions): + """Time+memory costs of creating a combined-regions cube. + + The result is lazy, and we don't do the actual calculation. + + """ + + def setup(self, n_cubesphere): + # In this case only, do *not* create the result cube. + # That is the operation we want to test. + super().setup(n_cubesphere, create_result_cube=False) + + def time_create_combined_cube(self, n_cubesphere): + self.recombine() + + def tracemalloc_create_combined_cube(self, n_cubesphere): + self.recombine() + + +class CombineRegionsComputeRealData(MixinCombineRegions): + """Time+memory costs of computing combined-regions data.""" + + def time_compute_data(self, n_cubesphere): + _ = self.recombined_cube.data + + def tracemalloc_compute_data(self, n_cubesphere): + _ = self.recombined_cube.data + + +class CombineRegionsSaveData(MixinCombineRegions): + """Test saving *only*. + + Test saving *only*, having replaced the input cube data with 'imaginary' + array data, so that input data is not loaded from disk during the save + operation. + + + """ + + def time_save(self, n_cubesphere): + # Save to disk, which must compute data + stream it to file. + save(self.recombined_cube, "tmp.nc") + + def tracemalloc_save(self, n_cubesphere): + save(self.recombined_cube, "tmp.nc") + + def track_filesize_saved(self, n_cubesphere): + save(self.recombined_cube, "tmp.nc") + return os.path.getsize("tmp.nc") * 1.0e-6 + + +CombineRegionsSaveData.track_filesize_saved.unit = "Mb" # type: ignore[attr-defined] + + +class CombineRegionsFileStreamedCalc(MixinCombineRegions): + """Test the whole cost of file-to-file streaming. + + Uses the combined cube which is based on lazy data loading from the region + cubes on disk. + """ + + def setup(self, n_cubesphere): + # In this case only, do *not* replace the loaded regions data with + # 'imaginary' data, as we want to test file-to-file calculation+save. + super().setup(n_cubesphere, imaginary_data=False) + + def time_stream_file2file(self, n_cubesphere): + # Save to disk, which must compute data + stream it to file. + save(self.recombined_cube, "tmp.nc") + + def tracemalloc_stream_file2file(self, n_cubesphere): + save(self.recombined_cube, "tmp.nc") diff --git a/benchmarks/benchmarks_iris/plot.py b/benchmarks/benchmarks_iris/plot.py new file mode 100644 index 00000000000..e8fbb5372d7 --- /dev/null +++ b/benchmarks/benchmarks_iris/plot.py @@ -0,0 +1,34 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Plot benchmark tests.""" + +import matplotlib as mpl +import numpy as np + +from iris import coords, cube, plot + +mpl.use("agg") + + +class AuxSort: + def setup(self): + # Manufacture data from which contours can be derived. + # Should generate 10 distinct contours, regardless of dim size. + dim_size = 200 + repeat_number = int(dim_size / 10) + repeat_range = range(int((dim_size**2) / repeat_number)) + data = np.repeat(repeat_range, repeat_number) + data = data.reshape((dim_size,) * 2) + + # These benchmarks are from a user perspective, so setting up a + # user-level case that will prompt the calling of aux_coords.sort in plot.py. + dim_coord = coords.DimCoord(np.arange(dim_size)) + local_cube = cube.Cube(data) + local_cube.add_aux_coord(dim_coord, 0) + self.cube = local_cube + + def time_aux_sort(self): + # Contour plot arbitrarily picked. Known to prompt aux_coords.sort. + plot.contour(self.cube) diff --git a/benchmarks/benchmarks_iris/regridding.py b/benchmarks/benchmarks_iris/regridding.py new file mode 100644 index 00000000000..e227da0ec69 --- /dev/null +++ b/benchmarks/benchmarks_iris/regridding.py @@ -0,0 +1,119 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Regridding benchmark test.""" + +# import iris tests first so that some things can be initialised before +# importing anything else +from iris import tests # isort:skip + +import numpy as np + +import iris +from iris.analysis import AreaWeighted, PointInCell +from iris.coords import AuxCoord + + +class HorizontalChunkedRegridding: + def setup(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) + self.cube = iris.load_cube(cube_file_path) + + # Prepare a tougher cube and chunk it + chunked_cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.chunked_cube = iris.load_cube(chunked_cube_file_path) + + # Chunked data makes the regridder run repeatedly + self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = iris.load_cube(template_file_path) + + # Prepare a regridding scheme + self.scheme_area_w = AreaWeighted() + + def time_regrid_area_w(self) -> None: + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, self.scheme_area_w) + # Realise the data + out.data + + def time_regrid_area_w_new_grid(self) -> None: + # Regrid the chunked cube + out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w) + # Realise data + out.data + + def tracemalloc_regrid_area_w(self) -> None: + # Regrid the chunked cube + out = self.cube.regrid(self.template_cube, self.scheme_area_w) + # Realise data + out.data + + tracemalloc_regrid_area_w.number = 3 # type: ignore[attr-defined] + + def tracemalloc_regrid_area_w_new_grid(self) -> None: + # Regrid the chunked cube + out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w) + # Realise data + out.data + + tracemalloc_regrid_area_w_new_grid.number = 3 # type: ignore[attr-defined] + + +class CurvilinearRegridding: + def setup(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) + self.cube = iris.load_cube(cube_file_path) + + # Make the source cube curvilinear + x_coord = self.cube.coord("longitude") + y_coord = self.cube.coord("latitude") + xx, yy = np.meshgrid(x_coord.points, y_coord.points) + self.cube.remove_coord(x_coord) + self.cube.remove_coord(y_coord) + x_coord_2d = AuxCoord( + xx, + standard_name=x_coord.standard_name, + units=x_coord.units, + coord_system=x_coord.coord_system, + ) + y_coord_2d = AuxCoord( + yy, + standard_name=y_coord.standard_name, + units=y_coord.units, + coord_system=y_coord.coord_system, + ) + self.cube.add_aux_coord(x_coord_2d, (1, 2)) + self.cube.add_aux_coord(y_coord_2d, (1, 2)) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = iris.load_cube(template_file_path) + + # Prepare a regridding scheme + self.scheme_pic = PointInCell() + + def time_regrid_pic(self) -> None: + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, self.scheme_pic) + # Realise the data + out.data + + def tracemalloc_regrid_pic(self) -> None: + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, self.scheme_pic) + # Realise the data + out.data + + tracemalloc_regrid_pic.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/save.py b/benchmarks/benchmarks_iris/save.py new file mode 100644 index 00000000000..4bac1b14505 --- /dev/null +++ b/benchmarks/benchmarks_iris/save.py @@ -0,0 +1,43 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File saving benchmarks.""" + +from iris import save +from iris.mesh import save_mesh + +from .generate_data.ugrid import make_cube_like_2d_cubesphere + + +class NetcdfSave: + params = [[50, 600], [False, True]] + param_names = ["cubesphere-N", "is_unstructured"] + + def setup(self, n_cubesphere, is_unstructured): + self.cube = make_cube_like_2d_cubesphere( + n_cube=n_cubesphere, with_mesh=is_unstructured + ) + + def _save_data(self, cube, do_copy=True): + if do_copy: + # Copy the cube, to avoid distorting the results by changing it + # Because we known that older Iris code realises lazy coords + cube = cube.copy() + save(cube, "tmp.nc") + + def _save_mesh(self, cube): + # In this case, we are happy that the mesh is *not* modified + save_mesh(cube.mesh, "mesh.nc") + + def time_netcdf_save_cube(self, n_cubesphere, is_unstructured): + self._save_data(self.cube) + + def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured): + if is_unstructured: + self._save_mesh(self.cube) + + def tracemalloc_netcdf_save(self, n_cubesphere, is_unstructured): + # Don't need to copy the cube here since track_ benchmarks don't + # do repeats between self.setup() calls. + self._save_data(self.cube, do_copy=False) diff --git a/benchmarks/benchmarks_iris/sperf/__init__.py b/benchmarks/benchmarks_iris/sperf/__init__.py new file mode 100644 index 00000000000..2b8b508fd57 --- /dev/null +++ b/benchmarks/benchmarks_iris/sperf/__init__.py @@ -0,0 +1,38 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project. + +SPerf = assessing performance against a series of increasingly large LFRic +datasets. +""" + +from iris import load_cube + +from ..generate_data.ugrid import make_cubesphere_testfile + + +class FileMixin: + """For use in any benchmark classes that work on a file.""" + + # Allows time for large file generation. + timeout = 3600.0 + # Largest file with these params: ~90GB. + # Total disk space: ~410GB. + params = [ + [12, 384, 640, 960, 1280, 1668], + [1, 36, 72], + [1, 3, 10], + ] + param_names = ["cubesphere_C", "N levels", "N time steps"] + # cubesphere_C: notation refers to faces per panel. + # e.g. C1 is 6 faces, 8 nodes + + def setup(self, c_size, n_levels, n_times): + self.file_path = make_cubesphere_testfile( + c_size=c_size, n_levels=n_levels, n_times=n_times + ) + + def load_cube(self): + return load_cube(str(self.file_path)) diff --git a/benchmarks/benchmarks_iris/sperf/combine_regions.py b/benchmarks/benchmarks_iris/sperf/combine_regions.py new file mode 100644 index 00000000000..591b7bb9bea --- /dev/null +++ b/benchmarks/benchmarks_iris/sperf/combine_regions.py @@ -0,0 +1,234 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Region combine benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" + +import os.path + +from dask import array as da +import numpy as np + +from iris import load, load_cube, save +from iris.mesh.utils import recombine_submeshes + +from .. import on_demand_benchmark +from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere + + +class Mixin: + # Characterise time taken + memory-allocated, for various stages of combine + # operations on cubesphere-like test data. + timeout = 300.0 + params = [100, 200, 300, 500, 1000, 1668] + param_names = ["cubesphere_C"] + # Fix result units for the tracking benchmarks. + unit = "Mb" + temp_save_path = BENCHMARK_DATA / "tmp.nc" + + def _parametrised_cache_filename(self, n_cubesphere, content_name): + return BENCHMARK_DATA / f"cube_C{n_cubesphere}_{content_name}.nc" + + def _make_region_cubes(self, full_mesh_cube): + """Make a fixed number of region cubes from a full meshcube.""" + # Divide the cube into regions. + n_faces = full_mesh_cube.shape[-1] + # Start with a simple list of face indices + # first extend to multiple of 5 + n_faces_5s = 5 * ((n_faces + 1) // 5) + i_faces = np.arange(n_faces_5s, dtype=int) + # reshape (5N,) to (N, 5) + i_faces = i_faces.reshape((n_faces_5s // 5, 5)) + # reorder [2, 3, 4, 0, 1] within each block of 5 + i_faces = np.concatenate([i_faces[:, 2:], i_faces[:, :2]], axis=1) + # flatten to get [2 3 4 0 1 (-) 8 9 10 6 7 (-) 13 14 15 11 12 ...] + i_faces = i_faces.flatten() + # reduce back to original length, wrap any overflows into valid range + i_faces = i_faces[:n_faces] % n_faces + + # Divide into regions -- always slightly uneven, since 7 doesn't divide + n_regions = 7 + n_facesperregion = n_faces // n_regions + i_face_regions = (i_faces // n_facesperregion) % n_regions + region_inds = [ + np.where(i_face_regions == i_region)[0] for i_region in range(n_regions) + ] + # NOTE: this produces 7 regions, with near-adjacent value ranges but + # with some points "moved" to an adjacent region. + # Also, region-0 is bigger (because of not dividing by 7). + + # Finally, make region cubes with these indices. + region_cubes = [full_mesh_cube[..., inds] for inds in region_inds] + return region_cubes + + def setup_cache(self): + """Cache all the necessary source data on disk.""" + # Control dask, to minimise memory usage + allow largest data. + self.fix_dask_settings() + + for n_cubesphere in self.params: + # Do for each parameter, since "setup_cache" is NOT parametrised + mesh_cube = make_cube_like_2d_cubesphere( + n_cube=n_cubesphere, with_mesh=True + ) + # Save to files which include the parameter in the names. + save( + mesh_cube, + self._parametrised_cache_filename(n_cubesphere, "meshcube"), + ) + region_cubes = self._make_region_cubes(mesh_cube) + save( + region_cubes, + self._parametrised_cache_filename(n_cubesphere, "regioncubes"), + ) + + def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=True): + """Combine tests "standard" setup operation. + + Load the source cubes (full-mesh + region) from disk. + These are specific to the cubesize parameter. + The data is cached on disk rather than calculated, to avoid any + pre-loading of the process memory allocation. + + If 'imaginary_data' is set (default), the region cubes data is replaced + with lazy data in the form of a da.zeros(). Otherwise, the region data + is lazy data from the files. + + If 'create_result_cube' is set, create "self.combined_cube" containing + the (still lazy) result. + + NOTE: various test classes override + extend this. + + """ + # Load source cubes (full-mesh and regions) + self.full_mesh_cube = load_cube( + self._parametrised_cache_filename(n_cubesphere, "meshcube") + ) + self.region_cubes = load( + self._parametrised_cache_filename(n_cubesphere, "regioncubes") + ) + + # Remove all var-names from loaded cubes, which can otherwise cause + # problems. Also implement 'imaginary' data. + for cube in self.region_cubes + [self.full_mesh_cube]: + cube.var_name = None + for coord in cube.coords(): + coord.var_name = None + if imaginary_data: + # Replace cube data (lazy file data) with 'imaginary' data. + # This has the same lazy-array attributes, but is allocated by + # creating chunks on demand instead of loading from file. + data = cube.lazy_data() + data = da.zeros(data.shape, dtype=data.dtype, chunks=data.chunksize) + cube.data = data + + if create_result_cube: + self.recombined_cube = self.recombine() + + # Fix dask usage mode for all the subsequent performance tests. + self.fix_dask_settings() + + def teardown(self, _): + self.temp_save_path.unlink(missing_ok=True) + + def fix_dask_settings(self): + """Fix "standard" dask behaviour for time+space testing. + + Currently this is single-threaded mode, with known chunksize, + which is optimised for space saving so we can test largest data. + + """ + import dask.config as dcfg + + # Use single-threaded, to avoid process-switching costs and minimise memory usage. + # N.B. generally may be slower, but use less memory ? + dcfg.set(scheduler="single-threaded") + # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks + dcfg.set({"array.chunk-size": "128Mib"}) + + def recombine(self): + # A handy general shorthand for the main "combine" operation. + result = recombine_submeshes( + self.full_mesh_cube, + self.region_cubes, + index_coord_name="i_mesh_face", + ) + return result + + def save_recombined_cube(self): + save(self.recombined_cube, self.temp_save_path) + + +@on_demand_benchmark +class CreateCube(Mixin): + """Time+memory costs of creating a combined-regions cube. + + The result is lazy, and we don't do the actual calculation. + + """ + + def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=False): + # In this case only, do *not* create the result cube. + # That is the operation we want to test. + super().setup(n_cubesphere, imaginary_data, create_result_cube) + + def time_create_combined_cube(self, n_cubesphere): + self.recombine() + + def tracemalloc_create_combined_cube(self, n_cubesphere): + self.recombine() + + +@on_demand_benchmark +class ComputeRealData(Mixin): + """Time+memory costs of computing combined-regions data.""" + + def time_compute_data(self, n_cubesphere): + _ = self.recombined_cube.data + + def tracemalloc_compute_data(self, n_cubesphere): + _ = self.recombined_cube.data + + +@on_demand_benchmark +class SaveData(Mixin): + """Test saving *only*. + + Test saving *only*, having replaced the input cube data with 'imaginary' + array data, so that input data is not loaded from disk during the save + operation. + + """ + + def time_save(self, n_cubesphere): + # Save to disk, which must compute data + stream it to file. + self.save_recombined_cube() + + def tracemalloc_save(self, n_cubesphere): + self.save_recombined_cube() + + def track_filesize_saved(self, n_cubesphere): + self.save_recombined_cube() + return self.temp_save_path.stat().st_size * 1.0e-6 + + +@on_demand_benchmark +class FileStreamedCalc(Mixin): + """Test the whole cost of file-to-file streaming. + + Uses the combined cube which is based on lazy data loading from the region + cubes on disk. + + """ + + def setup(self, n_cubesphere, imaginary_data=False, create_result_cube=True): + # In this case only, do *not* replace the loaded regions data with + # 'imaginary' data, as we want to test file-to-file calculation+save. + super().setup(n_cubesphere, imaginary_data, create_result_cube) + + def time_stream_file2file(self, n_cubesphere): + # Save to disk, which must compute data + stream it to file. + self.save_recombined_cube() + + def tracemalloc_stream_file2file(self, n_cubesphere): + self.save_recombined_cube() diff --git a/benchmarks/benchmarks_iris/sperf/equality.py b/benchmarks/benchmarks_iris/sperf/equality.py new file mode 100644 index 00000000000..ddee90cd283 --- /dev/null +++ b/benchmarks/benchmarks_iris/sperf/equality.py @@ -0,0 +1,35 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Equality benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" + +from .. import on_demand_benchmark +from . import FileMixin + + +@on_demand_benchmark +class CubeEquality(FileMixin): + r"""Benchmark time and memory costs. + + Benchmark time and memory costs of comparing :class:`~iris.cube.Cube`\\ s + with attached :class:`~iris.mesh.MeshXY`\\ es. + + Uses :class:`FileMixin` as the realistic case will be comparing + :class:`~iris.cube.Cube`\\ s that have been loaded from file. + + """ + + # Cut down paremt parameters. + params = [FileMixin.params[0]] + + def setup(self, c_size, n_levels=1, n_times=1): + super().setup(c_size, n_levels, n_times) + self.cube = self.load_cube() + self.other_cube = self.load_cube() + + def peakmem_eq(self, n_cube): + _ = self.cube == self.other_cube + + def time_eq(self, n_cube): + _ = self.cube == self.other_cube diff --git a/benchmarks/benchmarks_iris/sperf/load.py b/benchmarks/benchmarks_iris/sperf/load.py new file mode 100644 index 00000000000..d304a30c827 --- /dev/null +++ b/benchmarks/benchmarks_iris/sperf/load.py @@ -0,0 +1,27 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File loading benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" + +from .. import on_demand_benchmark +from . import FileMixin + + +@on_demand_benchmark +class Load(FileMixin): + def time_load_cube(self, _, __, ___): + _ = self.load_cube() + + +@on_demand_benchmark +class Realise(FileMixin): + def setup(self, c_size, n_levels, n_times): + super().setup(c_size, n_levels, n_times) + self.loaded_cube = self.load_cube() + + def time_realise_cube(self, _, __, ___): + # Don't touch loaded_cube.data - permanent realisation plays badly with + # ASV's re-run strategy. + assert self.loaded_cube.has_lazy_data() + self.loaded_cube.core_data().compute() diff --git a/benchmarks/benchmarks_iris/sperf/save.py b/benchmarks/benchmarks_iris/sperf/save.py new file mode 100644 index 00000000000..a715ec24240 --- /dev/null +++ b/benchmarks/benchmarks_iris/sperf/save.py @@ -0,0 +1,50 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""File saving benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" + +import os.path + +from iris import save +from iris.mesh import save_mesh + +from .. import on_demand_benchmark +from ..generate_data.ugrid import make_cube_like_2d_cubesphere + + +@on_demand_benchmark +class NetcdfSave: + """Benchmark time and memory costs of saving ~large-ish data cubes to netcdf.""" + + params = [[1, 100, 200, 300, 500, 1000, 1668], [False, True]] + param_names = ["cubesphere_C", "is_unstructured"] + # Fix result units for the tracking benchmarks. + unit = "Mb" + + def setup(self, n_cubesphere, is_unstructured): + self.cube = make_cube_like_2d_cubesphere( + n_cube=n_cubesphere, with_mesh=is_unstructured + ) + + def _save_cube(self, cube): + save(cube, "tmp.nc") + + def _save_mesh(self, cube): + save_mesh(cube.mesh, "mesh.nc") + + def time_save_cube(self, n_cubesphere, is_unstructured): + self._save_cube(self.cube) + + def tracemalloc_save_cube(self, n_cubesphere, is_unstructured): + self._save_cube(self.cube) + + def time_save_mesh(self, n_cubesphere, is_unstructured): + if is_unstructured: + self._save_mesh(self.cube) + + # The filesizes make a good reference point for the 'addedmem' memory + # usage results. + def track_filesize_save_cube(self, n_cubesphere, is_unstructured): + self._save_cube(self.cube) + return os.path.getsize("tmp.nc") * 1.0e-6 diff --git a/benchmarks/benchmarks_iris/stats.py b/benchmarks/benchmarks_iris/stats.py new file mode 100644 index 00000000000..fbab12cd4b2 --- /dev/null +++ b/benchmarks/benchmarks_iris/stats.py @@ -0,0 +1,52 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Stats benchmark tests.""" + +import iris +from iris.analysis.stats import pearsonr +import iris.tests + + +class PearsonR: + def setup(self): + cube_temp = iris.load_cube( + iris.tests.get_data_path( + ("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc") + ) + ) + + # Make data non-lazy. + cube_temp.data + + self.cube_a = cube_temp[:6] + self.cube_b = cube_temp[20:26] + self.cube_b.replace_coord(self.cube_a.coord("time")) + for name in ["latitude", "longitude"]: + self.cube_b.coord(name).guess_bounds() + self.weights = iris.analysis.cartography.area_weights(self.cube_b) + + def time_real(self): + pearsonr(self.cube_a, self.cube_b, weights=self.weights) + + def tracemalloc_real(self): + pearsonr(self.cube_a, self.cube_b, weights=self.weights) + + tracemalloc_real.number = 3 # type: ignore[attr-defined] + + def time_lazy(self): + for cube in self.cube_a, self.cube_b: + cube.data = cube.lazy_data() + + result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) + result.data + + def tracemalloc_lazy(self): + for cube in self.cube_a, self.cube_b: + cube.data = cube.lazy_data() + + result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) + result.data + + tracemalloc_lazy.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/trajectory.py b/benchmarks/benchmarks_iris/trajectory.py new file mode 100644 index 00000000000..77825ef2f2b --- /dev/null +++ b/benchmarks/benchmarks_iris/trajectory.py @@ -0,0 +1,56 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Trajectory benchmark test.""" + +# import iris tests first so that some things can be initialised before +# importing anything else +from iris import tests # isort:skip + +import numpy as np + +import iris +from iris.analysis.trajectory import interpolate + + +class TrajectoryInterpolation: + def setup(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) + self.cube = iris.load_cube(cube_file_path) + + trajectory = np.array([np.array((-50 + i, -50 + i)) for i in range(100)]) + self.sample_points = [ + ("longitude", trajectory[:, 0]), + ("latitude", trajectory[:, 1]), + ] + + def time_trajectory_linear(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="linear") + # Realise the data + out_cube.data + + def tracemalloc_trajectory_linear(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="linear") + # Realise the data + out_cube.data + + tracemalloc_trajectory_linear.number = 3 # type: ignore[attr-defined] + + def time_trajectory_nearest(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="nearest") + # Realise the data + out_cube.data + + def tracemalloc_trajectory_nearest(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="nearest") + # Realise the data + out_cube.data + + tracemalloc_trajectory_nearest.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/unit_style/__init__disabled.py b/benchmarks/benchmarks_iris/unit_style/__init__disabled.py new file mode 100644 index 00000000000..d7f84c2b919 --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/__init__disabled.py @@ -0,0 +1,16 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope benchmarks that can help with performance investigations. + +By renaming ``__init__.py`` these are all disabled by default: + +- They bloat benchmark run-time. +- They are too vulnerable to 'noise' due to their small scope - small objects, + short operations - they report a lot of false positive regressions. +- We rely on the wider-scope integration-style benchmarks to flag performance + changes, upon which we expect to do some manual investigation - these + smaller benchmarks can be run then. + +""" diff --git a/benchmarks/benchmarks_iris/unit_style/aux_factory.py b/benchmarks/benchmarks_iris/unit_style/aux_factory.py new file mode 100644 index 00000000000..329a2b0bdaf --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/aux_factory.py @@ -0,0 +1,52 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope AuxFactory benchmark tests.""" + +import numpy as np + +from iris import aux_factory, coords + + +class FactoryCommon: + # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: + # * make class an ABC + # * remove NotImplementedError + # * combine setup_common into setup + """Run a generalised suite of benchmarks for any factory. + + A base class running a generalised suite of benchmarks for any factory. + Factory to be specified in a subclass. + + ASV will run the benchmarks within this class for any subclasses. + + Should only be instantiated within subclasses, but cannot enforce this + since ASV cannot handle classes that include abstract methods. + """ + + def setup(self): + """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" + raise NotImplementedError + + def setup_common(self): + """Shared setup code that can be called by subclasses.""" + self.factory = self.create() + + def time_create(self): + """Create an instance of the benchmarked factory. + + Create method is specified in the subclass. + """ + self.create() + + +class HybridHeightFactory(FactoryCommon): + def setup(self): + data_1d = np.zeros(1000) + self.coord = coords.AuxCoord(points=data_1d, units="m") + + self.setup_common() + + def create(self): + return aux_factory.HybridHeightFactory(delta=self.coord) diff --git a/benchmarks/benchmarks_iris/unit_style/coords.py b/benchmarks/benchmarks_iris/unit_style/coords.py new file mode 100644 index 00000000000..704746f190a --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/coords.py @@ -0,0 +1,129 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope Coord benchmark tests.""" + +import numpy as np + +from iris import coords + +from .. import disable_repeat_between_setup + + +def setup(): + """General variables needed by multiple benchmark classes.""" + global data_1d + + data_1d = np.zeros(1000) + + +class CoordCommon: + # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: + # * make class an ABC + # * remove NotImplementedError + # * combine setup_common into setup + """Run a generalised suite of benchmarks for any coord. + + A base class running a generalised suite of benchmarks for any coord. + Coord to be specified in a subclass. + + ASV will run the benchmarks within this class for any subclasses. + + Should only be instantiated within subclasses, but cannot enforce this + since ASV cannot handle classes that include abstract methods. + """ + + def setup(self): + """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" + raise NotImplementedError + + def setup_common(self): + """Shared setup code that can be called by subclasses.""" + self.component = self.create() + + def time_create(self): + """Create an instance of the benchmarked factory. + + Create method is specified in the subclass. + """ + self.create() + + +class DimCoord(CoordCommon): + def setup(self): + point_values = np.arange(1000) + bounds = np.array([point_values - 1, point_values + 1]).transpose() + + self.create_kwargs = { + "points": point_values, + "bounds": bounds, + "units": "days since 1970-01-01", + "climatological": True, + } + + self.setup_common() + + def create(self): + return coords.DimCoord(**self.create_kwargs) + + def time_regular(self): + coords.DimCoord.from_regular(0, 1, 1000) + + +class AuxCoord(CoordCommon): + def setup(self): + bounds = np.array([data_1d - 1, data_1d + 1]).transpose() + + self.create_kwargs = { + "points": data_1d, + "bounds": bounds, + "units": "days since 1970-01-01", + "climatological": True, + } + + self.setup_common() + + def create(self): + return coords.AuxCoord(**self.create_kwargs) + + def time_points(self): + _ = self.component.points + + def time_bounds(self): + _ = self.component.bounds + + +@disable_repeat_between_setup +class AuxCoordLazy(AuxCoord): + """Lazy equivalent of :class:`AuxCoord`.""" + + def setup(self): + super().setup() + self.create_kwargs["points"] = self.component.lazy_points() + self.create_kwargs["bounds"] = self.component.lazy_bounds() + self.setup_common() + + +class CellMeasure(CoordCommon): + def setup(self): + self.setup_common() + + def create(self): + return coords.CellMeasure(data_1d) + + +class CellMethod(CoordCommon): + def setup(self): + self.setup_common() + + def create(self): + return coords.CellMethod("test") + + +class AncillaryVariable(CoordCommon): + def setup(self): + self.setup_common() + + def create(self): + return coords.AncillaryVariable(data_1d) diff --git a/benchmarks/benchmarks_iris/unit_style/cube.py b/benchmarks/benchmarks_iris/unit_style/cube.py new file mode 100644 index 00000000000..780418aa148 --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/cube.py @@ -0,0 +1,252 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope Cube benchmark tests.""" + +import numpy as np + +from iris import analysis, aux_factory, coords, cube + +from .. import disable_repeat_between_setup +from ..generate_data.stock import sample_meshcoord + + +def setup(*params): + """General variables needed by multiple benchmark classes.""" + global data_1d + global data_2d + global general_cube + + data_2d = np.zeros((1000,) * 2) + data_1d = data_2d[0] + general_cube = cube.Cube(data_2d) + + +class ComponentCommon: + # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: + # * make class an ABC + # * remove NotImplementedError + # * combine setup_common into setup + """Run a generalised suite of benchmarks for cubes. + + A base class running a generalised suite of benchmarks for cubes that + include a specified component (e.g. Coord, CellMeasure etc.). Component to + be specified in a subclass. + + ASV will run the benchmarks within this class for any subclasses. + + Should only be instantiated within subclasses, but cannot enforce this + since ASV cannot handle classes that include abstract methods. + """ + + def setup(self): + """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" + raise NotImplementedError + + def create(self): + """Create a cube (generic). + + cube_kwargs allow dynamic inclusion of different components; + specified in subclasses. + """ + return cube.Cube(data=data_2d, **self.cube_kwargs) + + def setup_common(self): + """Shared setup code that can be called by subclasses.""" + self.cube = self.create() + + def time_create(self): + """Create a cube that includes an instance of the benchmarked component.""" + self.create() + + def time_add(self): + """Add an instance of the benchmarked component to an existing cube.""" + # Unable to create the copy during setup since this needs to be re-done + # for every repeat of the test (some components disallow duplicates). + general_cube_copy = general_cube.copy(data=data_2d) + self.add_method(general_cube_copy, *self.add_args) + + +class Cube: + def time_basic(self): + cube.Cube(data_2d) + + def time_rename(self): + general_cube.name = "air_temperature" + + +class AuxCoord(ComponentCommon): + def setup(self): + self.coord_name = "test" + coord_bounds = np.array([data_1d - 1, data_1d + 1]).transpose() + aux_coord = coords.AuxCoord( + long_name=self.coord_name, + points=data_1d, + bounds=coord_bounds, + units="days since 1970-01-01", + climatological=True, + ) + + # Variables needed by the ComponentCommon base class. + self.cube_kwargs = {"aux_coords_and_dims": [(aux_coord, 0)]} + self.add_method = cube.Cube.add_aux_coord + self.add_args = (aux_coord, (0)) + + self.setup_common() + + def time_return_coords(self): + self.cube.coords() + + def time_return_coord_dims(self): + self.cube.coord_dims(self.coord_name) + + +class AuxFactory(ComponentCommon): + def setup(self): + coord = coords.AuxCoord(points=data_1d, units="m") + self.hybrid_factory = aux_factory.HybridHeightFactory(delta=coord) + + # Variables needed by the ComponentCommon base class. + self.cube_kwargs = { + "aux_coords_and_dims": [(coord, 0)], + "aux_factories": [self.hybrid_factory], + } + + self.setup_common() + + # Variables needed by the overridden time_add benchmark in this subclass. + cube_w_coord = self.cube.copy() + [cube_w_coord.remove_aux_factory(i) for i in cube_w_coord.aux_factories] + self.cube_w_coord = cube_w_coord + + def time_add(self): + # Requires override from super().time_add because the cube needs an + # additional coord. + self.cube_w_coord.add_aux_factory(self.hybrid_factory) + + +class CellMeasure(ComponentCommon): + def setup(self): + cell_measure = coords.CellMeasure(data_1d) + + # Variables needed by the ComponentCommon base class. + self.cube_kwargs = {"cell_measures_and_dims": [(cell_measure, 0)]} + self.add_method = cube.Cube.add_cell_measure + self.add_args = (cell_measure, 0) + + self.setup_common() + + +class CellMethod(ComponentCommon): + def setup(self): + cell_method = coords.CellMethod("test") + + # Variables needed by the ComponentCommon base class. + self.cube_kwargs = {"cell_methods": [cell_method]} + self.add_method = cube.Cube.add_cell_method + self.add_args = [cell_method] + + self.setup_common() + + +class AncillaryVariable(ComponentCommon): + def setup(self): + ancillary_variable = coords.AncillaryVariable(data_1d) + + # Variables needed by the ComponentCommon base class. + self.cube_kwargs = {"ancillary_variables_and_dims": [(ancillary_variable, 0)]} + self.add_method = cube.Cube.add_ancillary_variable + self.add_args = (ancillary_variable, 0) + + self.setup_common() + + +class MeshCoord: + params = [ + 6, # minimal cube-sphere + int(1e6), # realistic cube-sphere size + 1000, # To match size in :class:`AuxCoord` + ] + param_names = ["number of faces"] + + def setup(self, n_faces): + mesh_kwargs = dict(n_nodes=n_faces + 2, n_edges=n_faces * 2, n_faces=n_faces) + + self.mesh_coord = sample_meshcoord(sample_mesh_kwargs=mesh_kwargs) + self.data = np.zeros(n_faces) + self.cube_blank = cube.Cube(data=self.data) + self.cube = self.create() + + def create(self): + return cube.Cube(data=self.data, aux_coords_and_dims=[(self.mesh_coord, 0)]) + + def time_create(self, n_faces): + _ = self.create() + + @disable_repeat_between_setup + def time_add(self, n_faces): + self.cube_blank.add_aux_coord(self.mesh_coord, 0) + + @disable_repeat_between_setup + def time_remove(self, n_faces): + self.cube.remove_coord(self.mesh_coord) + + +class Merge: + def setup(self): + self.cube_list = cube.CubeList() + for i in np.arange(2): + i_cube = general_cube.copy() + i_coord = coords.AuxCoord([i]) + i_cube.add_aux_coord(i_coord) + self.cube_list.append(i_cube) + + def time_merge(self): + self.cube_list.merge() + + +class Concatenate: + def setup(self): + dim_size = 1000 + self.cube_list = cube.CubeList() + for i in np.arange(dim_size * 2, step=dim_size): + i_cube = general_cube.copy() + i_coord = coords.DimCoord(np.arange(dim_size) + (i * dim_size)) + i_cube.add_dim_coord(i_coord, 0) + self.cube_list.append(i_cube) + + def time_concatenate(self): + self.cube_list.concatenate() + + +class Equality: + def setup(self): + self.cube_a = general_cube.copy() + self.cube_b = general_cube.copy() + + aux_coord = coords.AuxCoord(data_1d) + self.cube_a.add_aux_coord(aux_coord, 0) + self.cube_b.add_aux_coord(aux_coord, 1) + + def time_equality(self): + self.cube_a == self.cube_b + + +class Aggregation: + def setup(self): + repeat_number = 10 + repeat_range = range(int(1000 / repeat_number)) + array_repeat = np.repeat(repeat_range, repeat_number) + array_unique = np.arange(len(array_repeat)) + + coord_repeat = coords.AuxCoord(points=array_repeat, long_name="repeat") + coord_unique = coords.DimCoord(points=array_unique, long_name="unique") + + local_cube = general_cube.copy() + local_cube.add_aux_coord(coord_repeat, 0) + local_cube.add_dim_coord(coord_unique, 0) + self.cube = local_cube + + def time_aggregated_by(self): + self.cube.aggregated_by("repeat", analysis.MEAN) diff --git a/benchmarks/benchmarks_iris/unit_style/mesh.py b/benchmarks/benchmarks_iris/unit_style/mesh.py new file mode 100644 index 00000000000..ed3aad14285 --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/mesh.py @@ -0,0 +1,187 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Benchmark tests for the iris.mesh module.""" + +from copy import deepcopy + +import numpy as np + +from iris import mesh + +from .. import disable_repeat_between_setup +from ..generate_data.stock import sample_mesh + + +class UGridCommon: + """Run a generalised suite of benchmarks for any mesh object. + + A base class running a generalised suite of benchmarks for any mesh object. + Object to be specified in a subclass. + + ASV will run the benchmarks within this class for any subclasses. + + ASV will not benchmark this class as setup() triggers a NotImplementedError. + (ASV has not yet released ABC/abstractmethod support - asv#838). + + """ + + params = [ + 6, # minimal cube-sphere + int(1e6), # realistic cube-sphere size + ] + param_names = ["number of faces"] + + def setup(self, *params): + self.object = self.create() + + def create(self): + raise NotImplementedError + + def time_create(self, *params): + """Create an instance of the benchmarked object. + + create() method is specified in the subclass. + """ + self.create() + + +class Connectivity(UGridCommon): + def setup(self, n_faces): + self.array = np.zeros([n_faces, 3], dtype=int) + super().setup(n_faces) + + def create(self): + return mesh.Connectivity(indices=self.array, cf_role="face_node_connectivity") + + def time_indices(self, n_faces): + _ = self.object.indices + + def time_location_lengths(self, n_faces): + # Proofed against the Connectivity name change (633ed17). + if getattr(self.object, "src_lengths", False): + meth = self.object.src_lengths + else: + meth = self.object.location_lengths + _ = meth() + + def time_validate_indices(self, n_faces): + self.object.validate_indices() + + +@disable_repeat_between_setup +class ConnectivityLazy(Connectivity): + """Lazy equivalent of :class:`Connectivity`.""" + + def setup(self, n_faces): + super().setup(n_faces) + self.array = self.object.lazy_indices() + self.object = self.create() + + +class MeshXY(UGridCommon): + def setup(self, n_faces, lazy=False): + #### + # Steal everything from the sample mesh for benchmarking creation of a + # brand new mesh. + source_mesh = sample_mesh( + n_nodes=n_faces + 2, + n_edges=n_faces * 2, + n_faces=n_faces, + lazy_values=lazy, + ) + + def get_coords_and_axes(location): + return [ + (source_mesh.coord(axis=axis, location=location), axis) + for axis in ("x", "y") + ] + + self.mesh_kwargs = dict( + topology_dimension=source_mesh.topology_dimension, + node_coords_and_axes=get_coords_and_axes("node"), + connectivities=source_mesh.connectivities(), + edge_coords_and_axes=get_coords_and_axes("edge"), + face_coords_and_axes=get_coords_and_axes("face"), + ) + #### + + super().setup(n_faces) + + self.face_node = self.object.face_node_connectivity + self.node_x = self.object.node_coords.node_x + # Kwargs for reuse in search and remove methods. + self.connectivities_kwarg = dict(cf_role="edge_node_connectivity") + self.coords_kwarg = dict(location="face") + + # TODO: an opportunity for speeding up runtime if needed, since + # eq_object is not needed for all benchmarks. Just don't generate it + # within a benchmark - the execution time is large enough that it + # could be a significant portion of the benchmark - makes regressions + # smaller and could even pick up regressions in copying instead! + self.eq_object = deepcopy(self.object) + + def create(self): + return mesh.MeshXY(**self.mesh_kwargs) + + def time_add_connectivities(self, n_faces): + self.object.add_connectivities(self.face_node) + + def time_add_coords(self, n_faces): + self.object.add_coords(node_x=self.node_x) + + def time_connectivities(self, n_faces): + _ = self.object.connectivities(**self.connectivities_kwarg) + + def time_coords(self, n_faces): + _ = self.object.coords(**self.coords_kwarg) + + def time_eq(self, n_faces): + _ = self.object == self.eq_object + + def time_remove_connectivities(self, n_faces): + self.object.remove_connectivities(**self.connectivities_kwarg) + + def time_remove_coords(self, n_faces): + self.object.remove_coords(**self.coords_kwarg) + + +@disable_repeat_between_setup +class MeshXYLazy(MeshXY): + """Lazy equivalent of :class:`MeshXY`.""" + + def setup(self, n_faces, lazy=True): + super().setup(n_faces, lazy=lazy) + + +class MeshCoord(UGridCommon): + # Add extra parameter value to match AuxCoord benchmarking. + params = UGridCommon.params + [1000] + + def setup(self, n_faces, lazy=False): + self.mesh = sample_mesh( + n_nodes=n_faces + 2, + n_edges=n_faces * 2, + n_faces=n_faces, + lazy_values=lazy, + ) + + super().setup(n_faces) + + def create(self): + return mesh.MeshCoord(mesh=self.mesh, location="face", axis="x") + + def time_points(self, n_faces): + _ = self.object.points + + def time_bounds(self, n_faces): + _ = self.object.bounds + + +@disable_repeat_between_setup +class MeshCoordLazy(MeshCoord): + """Lazy equivalent of :class:`MeshCoord`.""" + + def setup(self, n_faces, lazy=True): + super().setup(n_faces, lazy=lazy) diff --git a/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py b/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py new file mode 100644 index 00000000000..0af055fa820 --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py @@ -0,0 +1,83 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope metadata manager factory benchmark tests.""" + +from iris.common import ( + AncillaryVariableMetadata, + BaseMetadata, + CellMeasureMetadata, + CoordMetadata, + CubeMetadata, + DimCoordMetadata, + metadata_manager_factory, +) + + +class MetadataManagerFactory__create: + params = [1, 10, 100] + + def time_AncillaryVariableMetadata(self, n): + [metadata_manager_factory(AncillaryVariableMetadata) for _ in range(n)] + + def time_BaseMetadata(self, n): + [metadata_manager_factory(BaseMetadata) for _ in range(n)] + + def time_CellMeasureMetadata(self, n): + [metadata_manager_factory(CellMeasureMetadata) for _ in range(n)] + + def time_CoordMetadata(self, n): + [metadata_manager_factory(CoordMetadata) for _ in range(n)] + + def time_CubeMetadata(self, n): + [metadata_manager_factory(CubeMetadata) for _ in range(n)] + + def time_DimCoordMetadata(self, n): + [metadata_manager_factory(DimCoordMetadata) for _ in range(n)] + + +class MetadataManagerFactory: + def setup(self): + self.ancillary = metadata_manager_factory(AncillaryVariableMetadata) + self.base = metadata_manager_factory(BaseMetadata) + self.cell = metadata_manager_factory(CellMeasureMetadata) + self.coord = metadata_manager_factory(CoordMetadata) + self.cube = metadata_manager_factory(CubeMetadata) + self.dim = metadata_manager_factory(DimCoordMetadata) + + def time_AncillaryVariableMetadata_fields(self): + self.ancillary.fields + + def time_AncillaryVariableMetadata_values(self): + self.ancillary.values + + def time_BaseMetadata_fields(self): + self.base.fields + + def time_BaseMetadata_values(self): + self.base.values + + def time_CellMeasuresMetadata_fields(self): + self.cell.fields + + def time_CellMeasuresMetadata_values(self): + self.cell.values + + def time_CoordMetadata_fields(self): + self.coord.fields + + def time_CoordMetadata_values(self): + self.coord.values + + def time_CubeMetadata_fields(self): + self.cube.fields + + def time_CubeMetadata_values(self): + self.cube.values + + def time_DimCoordMetadata_fields(self): + self.dim.fields + + def time_DimCoordMetadata_values(self): + self.dim.values diff --git a/benchmarks/benchmarks_iris/unit_style/mixin.py b/benchmarks/benchmarks_iris/unit_style/mixin.py new file mode 100644 index 00000000000..92de5e7ad95 --- /dev/null +++ b/benchmarks/benchmarks_iris/unit_style/mixin.py @@ -0,0 +1,78 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Small-scope CFVariableMixin benchmark tests.""" + +import numpy as np + +from iris import coords +from iris.common.metadata import AncillaryVariableMetadata + +LONG_NAME = "air temperature" +STANDARD_NAME = "air_temperature" +VAR_NAME = "air_temp" +UNITS = "degrees" +ATTRIBUTES = dict(a=1) +DICT = dict( + standard_name=STANDARD_NAME, + long_name=LONG_NAME, + var_name=VAR_NAME, + units=UNITS, + attributes=ATTRIBUTES, +) +METADATA = AncillaryVariableMetadata(**DICT) +TUPLE = tuple(DICT.values()) + + +class CFVariableMixin: + def setup(self): + data_1d = np.zeros(1000) + + # These benchmarks are from a user perspective, so using a user-level + # subclass of CFVariableMixin to test behaviour. AncillaryVariable is + # the simplest so using that. + self.cfm_proxy = coords.AncillaryVariable(data_1d) + self.cfm_proxy.long_name = "test" + + def time_get_long_name(self): + self.cfm_proxy.long_name + + def time_set_long_name(self): + self.cfm_proxy.long_name = LONG_NAME + + def time_get_standard_name(self): + self.cfm_proxy.standard_name + + def time_set_standard_name(self): + self.cfm_proxy.standard_name = STANDARD_NAME + + def time_get_var_name(self): + self.cfm_proxy.var_name + + def time_set_var_name(self): + self.cfm_proxy.var_name = VAR_NAME + + def time_get_units(self): + self.cfm_proxy.units + + def time_set_units(self): + self.cfm_proxy.units = UNITS + + def time_get_attributes(self): + self.cfm_proxy.attributes + + def time_set_attributes(self): + self.cfm_proxy.attributes = ATTRIBUTES + + def time_get_metadata(self): + self.cfm_proxy.metadata + + def time_set_metadata__dict(self): + self.cfm_proxy.metadata = DICT + + def time_set_metadata__tuple(self): + self.cfm_proxy.metadata = TUPLE + + def time_set_metadata__metadata(self): + self.cfm_proxy.metadata = METADATA diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py new file mode 100644 index 00000000000..dc2e174f52c --- /dev/null +++ b/benchmarks/bm_runner.py @@ -0,0 +1,739 @@ +#!/usr/bin/env python3 +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Argparse conveniences for executing common types of benchmark runs.""" + +from abc import ABC, abstractmethod +import argparse +from datetime import datetime +from importlib import import_module +from os import environ +from pathlib import Path +import re +import shlex +import subprocess +from tempfile import NamedTemporaryFile +from textwrap import dedent +from typing import Literal, Protocol + +# The threshold beyond which shifts are 'notable'. See `asv compare`` docs +# for more. +COMPARE_FACTOR = 1.2 + +BENCHMARKS_DIR = Path(__file__).parent +ROOT_DIR = BENCHMARKS_DIR.parent +# Storage location for reports used in GitHub actions. +GH_REPORT_DIR = ROOT_DIR.joinpath(".github", "workflows", "benchmark_reports") + +# Common ASV arguments for all run_types except `custom`. +ASV_HARNESS = "run {posargs} --attribute rounds=3 --interleave-rounds --show-stderr" + + +def echo(echo_string: str): + # Use subprocess for printing to reduce chance of printing out of sequence + # with the subsequent calls. + subprocess.run(["echo", f"BM_RUNNER DEBUG: {echo_string}"]) + + +def _subprocess_runner(args, asv=False, **kwargs): + # Avoid permanent modifications if the same arguments are used more than once. + args = args.copy() + kwargs = kwargs.copy() + if asv: + args.insert(0, "asv") + kwargs["cwd"] = BENCHMARKS_DIR + echo(" ".join(args)) + kwargs.setdefault("check", True) + return subprocess.run(args, **kwargs) + + +def _subprocess_runner_capture(args, **kwargs) -> str: + result = _subprocess_runner(args, capture_output=True, **kwargs) + return result.stdout.decode().rstrip() + + +def _check_requirements(package: str) -> None: + try: + import_module(package) + except ImportError as exc: + message = ( + f"No {package} install detected. Benchmarks can only " + f"be run in an environment including {package}." + ) + raise Exception(message) from exc + + +def _prep_data_gen_env() -> None: + """Create or access a separate, unchanging environment for generating test data.""" + python_version = "3.13" + data_gen_var = "DATA_GEN_PYTHON" + if data_gen_var in environ: + echo("Using existing data generation environment.") + else: + echo("Setting up the data generation environment ...") + # Get Nox to build an environment for the `tests` session, but don't + # run the session. Will reuse a cached environment if appropriate. + _subprocess_runner( + [ + "nox", + f"--noxfile={ROOT_DIR / 'noxfile.py'}", + "--session=tests", + "--install-only", + f"--python={python_version}", + ] + ) + # Find the environment built above, set it to be the data generation + # environment. + env_directory: Path = next((ROOT_DIR / ".nox").rglob(f"tests*")) + data_gen_python = (env_directory / "bin" / "python").resolve() + environ[data_gen_var] = str(data_gen_python) + + def clone_resource(name: str, clone_source: str) -> Path: + resource_dir = data_gen_python.parents[1] / "resources" + resource_dir.mkdir(exist_ok=True) + clone_dir = resource_dir / name + if not clone_dir.is_dir(): + _subprocess_runner(["git", "clone", clone_source, str(clone_dir)]) + return clone_dir + + echo("Installing Mule into data generation environment ...") + mule_dir = clone_resource("mule", "https://github.com/metomi/mule.git") + _subprocess_runner( + [ + str(data_gen_python), + "-m", + "pip", + "install", + str(mule_dir / "mule"), + ] + ) + + test_data_var = "OVERRIDE_TEST_DATA_REPOSITORY" + if test_data_var not in environ: + echo("Installing iris-test-data into data generation environment ...") + test_data_dir = clone_resource( + "iris-test-data", "https://github.com/SciTools/iris-test-data.git" + ) + environ[test_data_var] = str(test_data_dir / "test_data") + + echo("Data generation environment ready.") + + +def _setup_common() -> None: + _check_requirements("asv") + _check_requirements("nox") + + _prep_data_gen_env() + + echo("Setting up ASV ...") + _subprocess_runner(["machine", "--yes"], asv=True) + + echo("Setup complete.") + + +def _asv_compare( + *commits: str, + overnight_mode: bool = False, + fail_on_regression: bool = False, +) -> None: + """Run through a list of commits comparing each one to the next.""" + commits = tuple(commit[:8] for commit in commits) + + machine_script = [ + "from asv.machine import Machine", + "print(Machine.get_unique_machine_name())", + ] + machine_name = _subprocess_runner_capture( + ["python", "-c", ";".join(machine_script)] + ) + + for i in range(len(commits) - 1): + before = commits[i] + after = commits[i + 1] + asv_command = shlex.split( + f"compare {before} {after} " + f"--machine {machine_name} --factor={COMPARE_FACTOR} --split" + ) + + comparison = _subprocess_runner_capture(asv_command, asv=True) + echo(comparison) + shifts = _subprocess_runner_capture([*asv_command, "--only-changed"], asv=True) + + if shifts or (not overnight_mode): + # For the overnight run: only post if there are shifts. + _gh_create_reports(after, comparison, shifts) + + if shifts and fail_on_regression: + # fail_on_regression supports setups that expect CI failures. + message = ( + f"Performance shifts detected between commits {before} and {after}.\n" + ) + raise RuntimeError(message) + + +def _gh_create_reports(commit_sha: str, results_full: str, results_shifts: str) -> None: + """If running under GitHub Actions: record the results in report(s). + + Posting the reports is done by :func:`_gh_post_reports`, which must be run + within a separate action to comply with GHA's security limitations. + """ + if "GITHUB_ACTIONS" not in environ: + # Only run when within GHA. + return + + pr_number = environ.get("PR_NUMBER", None) + on_pull_request = pr_number is not None + run_id = environ["GITHUB_RUN_ID"] + repo = environ["GITHUB_REPOSITORY"] + gha_run_link = f"[`{run_id}`](https://github.com/{repo}/actions/runs/{run_id})" + + GH_REPORT_DIR.mkdir(exist_ok=True) + commit_dir = GH_REPORT_DIR / commit_sha + commit_dir.mkdir() + command_path = commit_dir / "command.txt" + body_path = commit_dir / "body.txt" + + performance_report = dedent( + ( + """ + # :stopwatch: Performance Benchmark Report: {commit_sha} + +
+ Performance shifts + + ``` + {results_shifts} + ``` + +
+ +
+ Full benchmark results + + ``` + {results_full} + ``` + +
+ + Generated by GHA run {gha_run_link} + """ + ) + ) + performance_report = performance_report.format( + commit_sha=commit_sha, + results_shifts=results_shifts, + results_full=results_full, + gha_run_link=gha_run_link, + ) + + if on_pull_request: + # Command to post the report as a comment on the active PR. + body_path.write_text(performance_report) + command = ( + f"gh pr comment {pr_number} " + f"--body-file {body_path.absolute()} " + f"--repo {repo}" + ) + command_path.write_text(command) + + else: + # Command to post the report as new issue. + commit_msg = _subprocess_runner_capture( + f"git log {commit_sha}^! --oneline".split(" ") + ) + # Intended for benchmarking commits on trunk - should include a PR + # number due to our squash policy. + pr_tag_match = re.search("#[0-9]*", commit_msg) + + assignee = "" + pr_tag = "pull request number unavailable" + if pr_tag_match is not None: + pr_tag = pr_tag_match.group(0) + + for login_type in ("author", "mergedBy"): + gh_query = f'.["{login_type}"]["login"]' + commandlist = shlex.split( + f"gh pr view {pr_tag[1:]} " + f"--json {login_type} -q '{gh_query}' " + f"--repo {repo}" + ) + login = _subprocess_runner_capture(commandlist) + + commandlist = [ + "curl", + "-s", + f"https://api.github.com/users/{login}", + ] + login_info = _subprocess_runner_capture(commandlist) + is_user = '"type": "User"' in login_info + if is_user: + assignee = login + break + + title = f"Performance Shift(s): `{commit_sha}`" + body = dedent( + ( + f""" + Benchmark comparison has identified performance shifts at: + + * commit {commit_sha} ({pr_tag}). + +

+ Please review the report below and + take corrective/congratulatory action as appropriate + :slightly_smiling_face: +

+ """ + ) + ) + body += performance_report + body_path.write_text(body) + + command = ( + "gh issue create " + f'--title "{title}" ' + f"--body-file {body_path.absolute()} " + '--label "Bot" ' + '--label "Type: Performance" ' + f"--repo {repo}" + ) + if assignee: + command += f" --assignee {assignee}" + command_path.write_text(command) + + +def _gh_post_reports() -> None: + """If running under GitHub Actions: post pre-prepared benchmark reports. + + Reports are prepared by :func:`_gh_create_reports`, which must be run + within a separate action to comply with GHA's security limitations. + """ + if "GITHUB_ACTIONS" not in environ: + # Only run when within GHA. + return + + commit_dirs = [x for x in GH_REPORT_DIR.iterdir() if x.is_dir()] + for commit_dir in commit_dirs: + command_path = commit_dir / "command.txt" + command = command_path.read_text() + + # Security: only accept certain commands to run. + assert command.startswith(("gh issue create", "gh pr comment")) + + _subprocess_runner(shlex.split(command)) + + +class _SubParserGenerator(ABC): + """Convenience for holding all the necessary argparse info in 1 place.""" + + name: str = NotImplemented + description: str = NotImplemented + epilog: str = NotImplemented + + class _SubParsersType(Protocol): + """Duck typing since argparse._SubParsersAction is private.""" + + def add_parser(self, name, **kwargs) -> argparse.ArgumentParser: ... + + def __init__(self, subparsers: _SubParsersType) -> None: + self.subparser = subparsers.add_parser( + self.name, + description=self.description, + epilog=self.epilog, + formatter_class=argparse.RawTextHelpFormatter, + ) + self.add_arguments() + self.add_asv_arguments() + self.subparser.set_defaults(func=self.func) + + @abstractmethod + def add_arguments(self) -> None: + """All custom self.subparser.add_argument() calls.""" + _ = NotImplemented + + def add_asv_arguments(self) -> None: + self.subparser.add_argument( + "asv_args", + nargs=argparse.REMAINDER, + help="Any number of arguments to pass down to the ASV benchmark command.", + ) + + @staticmethod + @abstractmethod + def func(args: argparse.Namespace): + """Return when the subparser is parsed. + + `func` is then called, performing the user's selected sub-command. + + """ + _ = args + return NotImplemented + + +class Overnight(_SubParserGenerator): + name = "overnight" + description = ( + "Benchmarks all commits between the input **first_commit** to ``HEAD``, " + "comparing each to its parent for performance shifts. If running on " + "GitHub Actions: performance shift(s) will be reported in a new issue.\n" + "Designed for checking the previous 24 hours' commits, typically in a " + "scheduled script.\n" + "Uses `asv run`." + ) + epilog = ( + "e.g. python bm_runner.py overnight a1b23d4\n" + "e.g. python bm_runner.py overnight a1b23d4 --bench=regridding" + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "first_commit", + type=str, + help="The first commit in the benchmarking commit sequence.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + + commit_range = f"{args.first_commit}^^.." + # git rev-list --first-parent is the command ASV uses. + git_command = shlex.split(f"git rev-list --first-parent {commit_range}") + commit_string = _subprocess_runner_capture(git_command) + commit_list = commit_string.split("\n") + + asv_command = shlex.split(ASV_HARNESS.format(posargs=commit_range)) + try: + _subprocess_runner([*asv_command, *args.asv_args], asv=True) + finally: + # Designed for long running - want to compare/post any valid + # results even if some are broken. + _asv_compare(*reversed(commit_list), overnight_mode=True) + + +class Branch(_SubParserGenerator): + name = "branch" + description = ( + "Performs the same operations as ``overnight``, but always on two " + "commits only - ``HEAD``, and ``HEAD``'s merge-base with the input " + "**base_branch**.\n" + "If running on GitHub Actions: HEAD will be GitHub's " + "merge commit and merge-base will be the merge target. Performance " + "comparisons will be posted in a comment on the relevant pull request.\n" + "Designed for testing if the active branch's changes cause performance " + "shifts - anticipating what would be caught by ``overnight`` once " + "merged.\n\n" + "**For maximum accuracy, avoid using the machine that is running this " + "session. Run time could be >1 hour for the full benchmark suite.**\n" + "Uses `asv run`." + ) + epilog = ( + "e.g. python bm_runner.py branch upstream/main\n" + "e.g. python bm_runner.py branch upstream/main --bench=regridding" + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "base_branch", + type=str, + help="A branch that has the merge-base with ``HEAD`` - ``HEAD`` will be benchmarked against that merge-base.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + + git_command = shlex.split("git rev-parse HEAD") + head_sha = _subprocess_runner_capture(git_command)[:8] + + git_command = shlex.split(f"git merge-base {head_sha} {args.base_branch}") + merge_base = _subprocess_runner_capture(git_command)[:8] + + with NamedTemporaryFile("w") as hashfile: + hashfile.writelines([merge_base, "\n", head_sha]) + hashfile.flush() + commit_range = f"HASHFILE:{hashfile.name}" + asv_command = shlex.split(ASV_HARNESS.format(posargs=commit_range)) + _subprocess_runner([*asv_command, *args.asv_args], asv=True) + + _asv_compare(merge_base, head_sha) + + +class _CSPerf(_SubParserGenerator, ABC): + """Common code used by both CPerf and SPerf.""" + + description = ( + "Run the on-demand {} suite of benchmarks (part of the UK Met " + "Office NG-VAT project) for the ``HEAD`` of ``upstream/main`` only, " + "and publish the results to the input **publish_dir**, within a " + "unique subdirectory for this run.\n" + "Uses `asv run`." + ) + epilog = ( + "e.g. python bm_runner.py {0} my_publish_dir\n" + "e.g. python bm_runner.py {0} my_publish_dir --bench=regridding" + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "publish_dir", + type=str, + help="HTML results will be published to a sub-dir in this dir.", + ) + + @staticmethod + def csperf(args: argparse.Namespace, run_type: Literal["cperf", "sperf"]) -> None: + _setup_common() + + publish_dir = Path(args.publish_dir) + if not publish_dir.is_dir(): + message = f"Input 'publish directory' is not a directory: {publish_dir}" + raise NotADirectoryError(message) + publish_subdir = ( + publish_dir / f"{run_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + ) + publish_subdir.mkdir() + + # Activate on demand benchmarks (C/SPerf are deactivated for + # 'standard' runs). + environ["ON_DEMAND_BENCHMARKS"] = "True" + commit_range = "upstream/main^!" + + asv_command_str = ( + ASV_HARNESS.format(posargs=commit_range) + f" --bench={run_type}" + ) + + # Only do a single round. + asv_command = shlex.split(re.sub(r"rounds=\d", "rounds=1", asv_command_str)) + try: + _subprocess_runner([*asv_command, *args.asv_args], asv=True) + except subprocess.CalledProcessError as err: + # C/SPerf benchmarks are much bigger than the CI ones: + # Don't fail the whole run if memory blows on 1 benchmark. + # ASV produces return code of 2 if the run includes crashes. + if err.returncode != 2: + raise + + asv_command = shlex.split(f"publish {commit_range} --html-dir={publish_subdir}") + _subprocess_runner(asv_command, asv=True) + + # Print completion message. + location = BENCHMARKS_DIR / ".asv" + echo( + f'New ASV results for "{run_type}".\n' + f'See "{publish_subdir}",' + f'\n or JSON files under "{location / "results"}".' + ) + + +class CPerf(_CSPerf): + name = "cperf" + description = _CSPerf.description.format("CPerf") + epilog = _CSPerf.epilog.format("cperf") + + @staticmethod + def func(args: argparse.Namespace) -> None: + _CSPerf.csperf(args, "cperf") + + +class SPerf(_CSPerf): + name = "sperf" + description = _CSPerf.description.format("SPerf") + epilog = _CSPerf.epilog.format("sperf") + + @staticmethod + def func(args: argparse.Namespace) -> None: + _CSPerf.csperf(args, "sperf") + + +class Custom(_SubParserGenerator): + name = "custom" + description = ( + "Run ASV with the input **ASV sub-command**, without any preset " + "arguments - must all be supplied by the user. So just like running " + "ASV manually, with the convenience of re-using the runner's " + "scripted setup steps." + ) + epilog = "e.g. python bm_runner.py custom continuous a1b23d4 HEAD --quick" + + def add_arguments(self) -> None: + self.subparser.add_argument( + "asv_sub_command", + type=str, + help="The ASV command to run.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + _subprocess_runner([args.asv_sub_command, *args.asv_args], asv=True) + + +class TrialRun(_SubParserGenerator): + name = "trialrun" + description = ( + "Fast trial-run a given benchmark, to check it works : " + "in a provided or latest-lockfile environment, " + "with no repeats for accuracy of measurement." + ) + epilog = ( + "e.g. python bm_runner.py trialrun " + "MyBenchmarks.time_calc ${DATA_GEN_PYTHON}" + "\n\nNOTE: 'runpath' also replaces $DATA_GEN_PYTHON during the run." + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "benchmark", + type=str, + help=( + "A benchmark name, possibly including wildcards, " + "as supported by the ASV '--bench' argument." + ), + ) + self.subparser.add_argument( + "runpath", + type=str, + help=( + "A path to an existing python executable, " + "to completely bypass environment building." + ), + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + if args.runpath: + # Shortcut creation of a data-gen environment + # - which is also the trial-run env. + python_path = Path(args.runpath).resolve() + environ["DATA_GEN_PYTHON"] = str(python_path) + _setup_common() + # get path of data-gen environment, setup by previous call + python_path = Path(environ["DATA_GEN_PYTHON"]) + # allow 'on-demand' benchmarks + environ["ON_DEMAND_BENCHMARKS"] = "1" + asv_command = [ + "run", + "--bench", + args.benchmark, + # no repeats for timing accuracy + "--quick", + "--show-stderr", + # do not build a unique env : run test in data-gen environment + "--environment", + f"existing:{python_path}", + ] + args.asv_args + _subprocess_runner(asv_command, asv=True) + + +class Validate(_SubParserGenerator): + name = "validate" + description = ( + "Quickly check that the benchmark architecture works as intended with " + "the current codebase. Things that are checked: env creation/update, " + "package build/install/uninstall, artificial data creation." + ) + epilog = "Sole acceptable syntax: python bm_runner.py validate" + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + + git_command = shlex.split("git rev-parse HEAD") + head_sha = _subprocess_runner_capture(git_command)[:8] + + # Find the most recent commit where the lock-files are not + # identical to HEAD - will force environment updates. + locks_dir = Path(__file__).parents[1] / "requirements" / "locks" + assert locks_dir.is_dir() + git_command = shlex.split( + f"git log -1 --pretty=format:%P -- {locks_dir.resolve()}" + ) + locks_sha = _subprocess_runner_capture(git_command)[:8] + + with NamedTemporaryFile("w") as hashfile: + hashfile.writelines([locks_sha, "\n", head_sha]) + hashfile.flush() + asv_command = shlex.split( + f"run HASHFILE:{hashfile.name} --bench ValidateSetup " + "--attribute rounds=1 --show-stderr" + ) + extra_env = environ | {"ON_DEMAND_BENCHMARKS": "1"} + _subprocess_runner(asv_command, asv=True, env=extra_env) + + # No arguments permitted for this subclass: + + def add_arguments(self) -> None: + pass + + def add_asv_arguments(self) -> None: + pass + + +class GhPost(_SubParserGenerator): + name = "_gh_post" + description = ( + "Used by GitHub Actions to post benchmark reports that were prepared " + "during previous actions. Separated to comply with GitHub's security " + "requirements." + ) + epilog = "Sole acceptable syntax: python bm_runner.py _gh_post" + + @staticmethod + def func(args: argparse.Namespace) -> None: + _gh_post_reports() + + # No arguments permitted for this subclass: + + def add_arguments(self) -> None: + pass + + def add_asv_arguments(self) -> None: + pass + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Run the repository performance benchmarks (using Airspeed Velocity)." + ), + epilog=( + "More help is available within each sub-command." + "\n\nNOTE(1): a separate python environment is created to " + "construct test files.\n Set $DATA_GEN_PYTHON to avoid the cost " + "of this." + "\nNOTE(2): iris-test-data is downloaded and cached within the " + "data generation environment.\n Set " + "$OVERRIDE_TEST_DATA_REPOSITORY to avoid the cost of this." + "\nNOTE(3): test data is cached within the " + "benchmarks code directory, and uses a lot of disk space " + "of disk space (Gb).\n Set $BENCHMARK_DATA to specify where this " + "space can be safely allocated." + ), + formatter_class=argparse.RawTextHelpFormatter, + ) + subparsers = parser.add_subparsers(required=True) + + parser_generators: tuple[type[_SubParserGenerator], ...] = ( + Overnight, + Branch, + CPerf, + SPerf, + Custom, + TrialRun, + Validate, + GhPost, + ) + + for gen in parser_generators: + _ = gen(subparsers).subparser + + parsed = parser.parse_args() + parsed.func(parsed) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/custom_bms/README.md b/benchmarks/custom_bms/README.md new file mode 100644 index 00000000000..eea85d74fe9 --- /dev/null +++ b/benchmarks/custom_bms/README.md @@ -0,0 +1,11 @@ +# Iris custom benchmarks + +To be recognised by ASV, these benchmarks must be packaged and installed in +line with the +[ASV guidelines](https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html). +This is achieved using the custom build in [install.py](./install.py). + +Installation is into the environment where the benchmarks are run (i.e. not +the environment containing ASV + Nox, but the one built to the same +specifications as the Tests environment). This is done via `build_command` +in [asv.conf.json](../asv.conf.json). diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py new file mode 100644 index 00000000000..bda9f1cc3cd --- /dev/null +++ b/benchmarks/custom_bms/install.py @@ -0,0 +1,55 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Install the SciTools custom benchmarks for detection by ASV. + +See the requirements for being detected as an ASV plugin: +https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html +""" + +from pathlib import Path +import shutil +from subprocess import run +from tempfile import TemporaryDirectory + +this_dir = Path(__file__).parent + + +def package_files(new_dir: Path) -> None: + """Package SciTools' custom benchmarks for detection by ASV. + + Parameters + ---------- + new_dir : Path + The directory to package the custom benchmarks in. + """ + asv_bench_scitools = new_dir / "asv_bench_scitools" + benchmarks = asv_bench_scitools / "benchmarks" + benchmarks.mkdir(parents=True) + (asv_bench_scitools / "__init__.py").touch() + + for py_file in this_dir.glob("*.py"): + if py_file != Path(__file__): + shutil.copy2(py_file, benchmarks) + + # Create this on the fly, as having multiple pyproject.toml files in 1 + # project causes problems. + py_project = new_dir / "pyproject.toml" + py_project.write_text( + """ + [project] + name = "asv_bench_scitools" + version = "0.1" + """ + ) + + +def main(): + with TemporaryDirectory() as temp_dir: + package_files(Path(temp_dir)) + run(["python", "-m", "pip", "install", temp_dir]) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py new file mode 100644 index 00000000000..486c67aeb99 --- /dev/null +++ b/benchmarks/custom_bms/tracemallocbench.py @@ -0,0 +1,196 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. + +"""Benchmark for growth in process resident memory, repeating for accuracy. + +Uses a modified version of the repeat logic in +:class:`asv_runner.benchmarks.time.TimeBenchmark`. +""" + +import re +from timeit import Timer +import tracemalloc +from typing import Callable + +from asv_runner.benchmarks.time import TimeBenchmark, wall_timer + + +class TracemallocBenchmark(TimeBenchmark): + """Benchmark for growth in process resident memory, repeating for accuracy. + + Obviously limited as to what it actually measures : Relies on the current + process not having significant unused (de-allocated) memory when the + tested codeblock runs, and only reliable when the code allocates a + significant amount of new memory. + + Benchmark operations prefixed with ``tracemalloc_`` or ``Tracemalloc`` will + use this benchmark class. + + Inherits behaviour from :class:`asv_runner.benchmarks.time.TimeBenchmark`, + with modifications for memory measurement. See the below Attributes section + and https://asv.readthedocs.io/en/stable/writing_benchmarks.html#timing-benchmarks. + + Attributes + ---------- + Mostly identical to :class:`asv_runner.benchmarks.time.TimeBenchmark`. See + https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks + Make sure to use the inherited ``repeat`` attribute if greater accuracy + is needed. Below are the attributes where inherited behaviour is + overridden. + + number : int + The number of times the benchmarked operation will be called per + ``repeat``. Memory growth is measured after ALL calls - + i.e. `number` should make no difference to the result if the operation + has perfect garbage collection. The parent class's intelligent + modification of `number` is NOT inherited. A minimum value of ``1`` is + enforced. + warmup_time, sample_time, min_run_count, timer + Not used. + type : str = "tracemalloc" + The name of this benchmark type. + unit : str = "bytes" + The units of the measured metric (i.e. the growth in memory). + + """ + + name_regex = re.compile("^(Tracemalloc[A-Z_].+)|(tracemalloc_.+)$") + + param: tuple + + def __init__(self, name: str, func: Callable, attr_sources: list) -> None: + """Initialize a new instance of `TracemallocBenchmark`. + + Parameters + ---------- + name : str + The name of the benchmark. + func : callable + The function to benchmark. + attr_sources : list + A list of objects from which to draw attributes. + """ + super().__init__(name, func, attr_sources) + self.type = "tracemalloc" + self.unit = "bytes" + + def _load_vars(self): + """Load benchmark variables from attribute sources. + + Downstream handling of ``number`` is not the same as in the parent, so + need to make sure it is at least 1. + """ + super()._load_vars() + self.number = max(1, self.number) + + def run(self, *param: tuple) -> dict: + """Run the benchmark with the given parameters. + + Downstream handling of ``param`` is not the same as in the parent, so + need to store it now. + + Parameters + ---------- + *param : tuple + The parameters to pass to the benchmark function. + + Returns + ------- + dict + A dictionary with the benchmark results. It contains the samples + taken, and "the number of times the function was called in each + sample" - for this benchmark that is always ``1`` to avoid the + parent class incorrectly modifying the results. + """ + self.param = param + return super().run(*param) + + def benchmark_timing( + self, + timer: Timer, + min_repeat: int, + max_repeat: int, + max_time: float, + warmup_time: float, + number: int, + min_run_count: int, + ) -> tuple[list[int], int]: + """Benchmark the timing of the function execution. + + Heavily modified from the parent method + - Directly performs setup and measurement (parent used timeit). + - `number` used differently (see Parameters). + - No warmup phase. + + Parameters + ---------- + timer : timeit.Timer + Not used. + min_repeat : int + The minimum number of times to repeat the function execution. + max_repeat : int + The maximum number of times to repeat the function execution. + max_time : float + The maximum total time to spend on the benchmarking. + warmup_time : float + Not used. + number : int + The number of times the benchmarked operation will be called per + repeat. Memory growth is measured after ALL calls - i.e. `number` + should make no difference to the result if the operation + has perfect garbage collection. The parent class's intelligent + modification of `number` is NOT inherited. + min_run_count : int + Not used. + + Returns + ------- + list + A list of the measured memory growths, in bytes. + int = 1 + Part of the inherited return signature. Must be 1 to avoid + the parent incorrectly modifying the results. + """ + start_time = wall_timer() + samples: list[int] = [] + + def too_slow(num_samples) -> bool: + """Stop taking samples if limits exceeded. + + Parameters + ---------- + num_samples : int + The number of samples taken so far. + + Returns + ------- + bool + True if the benchmark should stop, False otherwise. + """ + if num_samples < min_repeat: + return False + return wall_timer() > start_time + max_time + + # Collect samples + while len(samples) < max_repeat: + self.redo_setup() + tracemalloc.start() + for _ in range(number): + __ = self.func(*self.param) + _, peak_mem_bytes = tracemalloc.get_traced_memory() + tracemalloc.stop() + + samples.append(peak_mem_bytes) + + if too_slow(len(samples)): + break + + # ``number`` is not used in the same way as in the parent class. Must + # be returned as 1 to avoid parent incorrectly modifying the results. + return samples, 1 + + +# https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html +export_as_benchmark = [TracemallocBenchmark] From a1a70af27ffb55459eb87c8099841be8e967e23d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:24:41 +0200 Subject: [PATCH 02/22] Try Iris workflows --- .github/workflows/benchmarks-last-release.yml | 80 ------------------- .github/workflows/benchmarks.yml | 77 ------------------ .github/workflows/benchmarks_report.yml | 4 +- 3 files changed, 2 insertions(+), 159 deletions(-) delete mode 100644 .github/workflows/benchmarks-last-release.yml delete mode 100644 .github/workflows/benchmarks.yml diff --git a/.github/workflows/benchmarks-last-release.yml b/.github/workflows/benchmarks-last-release.yml deleted file mode 100644 index bf3f5de480f..00000000000 --- a/.github/workflows/benchmarks-last-release.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: Benchmark compare last release - -on: - push: - branches: - - main - workflow_dispatch: - -jobs: - benchmark: - name: Linux - runs-on: ubuntu-latest - env: - ASV_DIR: "./asv_bench" - CONDA_ENV_FILE: ci/requirements/environment.yml - - steps: - # We need the full repo to avoid this issue - # https://github.com/actions/checkout/issues/23 - - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Set up conda environment - uses: mamba-org/setup-micromamba@v2 - with: - micromamba-version: "1.5.10-0" - environment-file: ${{env.CONDA_ENV_FILE}} - environment-name: xarray-tests - cache-environment: true - cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark" - create-args: >- - asv - - - name: "Get Previous tag" - id: previoustag - uses: "WyriHaximus/github-action-get-previous-tag@v1" - # with: - # fallback: 1.0.0 # Optional fallback tag to use when no tag can be found - - - name: Run benchmarks - shell: bash -l {0} - id: benchmark - env: - OPENBLAS_NUM_THREADS: 1 - MKL_NUM_THREADS: 1 - OMP_NUM_THREADS: 1 - ASV_FACTOR: 1.5 - ASV_SKIP_SLOW: 1 - run: | - set -x - # ID this runner - asv machine --yes - echo "Baseline: ${{ steps.previoustag.outputs.tag }} " - echo "Contender: ${{ github.sha }}" - # Use mamba for env creation - # export CONDA_EXE=$(which mamba) - export CONDA_EXE=$(which conda) - # Run benchmarks for current commit against base - ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" - asv continuous $ASV_OPTIONS ${{ steps.previoustag.outputs.tag }} ${{ github.sha }} \ - | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ - | tee benchmarks.log - # Report and export results for subsequent steps - if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then - exit 1 - fi - working-directory: ${{ env.ASV_DIR }} - - - name: Add instructions to artifact - if: always() - run: | - cp benchmarks/README_CI.md benchmarks.log .asv/results/ - working-directory: ${{ env.ASV_DIR }} - - - uses: actions/upload-artifact@v4 - if: always() - with: - name: asv-benchmark-results-${{ runner.os }} - path: ${{ env.ASV_DIR }}/.asv/results diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml deleted file mode 100644 index 113e8184f56..00000000000 --- a/.github/workflows/benchmarks.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Benchmark - -on: - pull_request: - types: [opened, reopened, synchronize, labeled] - workflow_dispatch: - -env: - PR_HEAD_LABEL: ${{ github.event.pull_request.head.label }} - -jobs: - benchmark: - if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmark') && github.event_name == 'pull_request' || contains( github.event.pull_request.labels.*.name, 'topic-performance') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }} - name: Linux - runs-on: ubuntu-latest - env: - ASV_DIR: "./asv_bench" - CONDA_ENV_FILE: ci/requirements/environment-benchmark.yml - - steps: - # We need the full repo to avoid this issue - # https://github.com/actions/checkout/issues/23 - - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Set up conda environment - uses: mamba-org/setup-micromamba@v2 - with: - micromamba-version: "1.5.10-0" - environment-file: ${{env.CONDA_ENV_FILE}} - environment-name: xarray-benchmark - cache-environment: true - cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark" - # add "build" because of https://github.com/airspeed-velocity/asv/issues/1385 - create-args: >- - asv - python-build - mamba<=1.5.10 - - - name: Run benchmarks - shell: bash -l {0} - id: benchmark - env: - OPENBLAS_NUM_THREADS: 1 - MKL_NUM_THREADS: 1 - OMP_NUM_THREADS: 1 - ASV_FACTOR: 1.5 - ASV_SKIP_SLOW: 1 - run: | - set -x - # ID this runner - asv machine --yes - echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})" - echo "Contender: ${GITHUB_SHA} ($PR_HEAD_LABEL)" - # Run benchmarks for current commit against base - ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" - asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \ - | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ - | tee benchmarks.log - # Report and export results for subsequent steps - if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then - exit 1 - fi - working-directory: ${{ env.ASV_DIR }} - - - name: Add instructions to artifact - if: always() - run: | - cp benchmarks/README_CI.md benchmarks.log .asv/results/ - working-directory: ${{ env.ASV_DIR }} - - - uses: actions/upload-artifact@v4 - if: always() - with: - name: asv-benchmark-results-${{ runner.os }} - path: ${{ env.ASV_DIR }}/.asv/results diff --git a/.github/workflows/benchmarks_report.yml b/.github/workflows/benchmarks_report.yml index 1de0f34b4c4..bdaf76e0391 100644 --- a/.github/workflows/benchmarks_report.yml +++ b/.github/workflows/benchmarks_report.yml @@ -20,7 +20,7 @@ jobs: - name: Download artifact id: download-artifact # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#using-data-from-the-triggering-workflow - uses: actions/github-script@v7 + uses: actions/github-script@v8 with: script: | let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ @@ -75,7 +75,7 @@ jobs: - name: Set up Python # benchmarks/bm_runner.py only needs builtins to run. - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 - name: Post reports env: From 5ec482a0eb6152e5539c1fd3eb16701e984adf04 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:36:02 +0200 Subject: [PATCH 03/22] Switch repository --- .github/workflows/benchmarks_run.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml index 8b1966c0377..a97bb68d480 100644 --- a/.github/workflows/benchmarks_run.yml +++ b/.github/workflows/benchmarks_run.yml @@ -24,7 +24,7 @@ jobs: # This workflow supports two different scenarios (overnight and branch). # The pre-checks job determines which scenario is being run. runs-on: ubuntu-latest - if: github.repository == 'SciTools/iris' + if: github.repository == 'pydata/xarray' outputs: overnight: ${{ steps.overnight.outputs.check }} branch: ${{ steps.branch.outputs.check }} @@ -135,7 +135,7 @@ jobs: then first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1) fi - + if [ "$first_commit" != "" ] then nox -s benchmarks -- overnight $first_commit From a1e0be00ead0a74be1b56c1ef7be04c0d828dfe2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:36:29 +0200 Subject: [PATCH 04/22] Add xarray typical benchmark labels --- .github/workflows/benchmarks_run.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml index a97bb68d480..d840ca1a4f3 100644 --- a/.github/workflows/benchmarks_run.yml +++ b/.github/workflows/benchmarks_run.yml @@ -50,6 +50,10 @@ jobs: steps.files-changed.outputs.changed == 'true' || github.event.label.name == 'benchmark_this' + || + github.event.label.name == 'run-benchmarks' + || + github.event.label.name == 'topic-performance' ) run: echo "check=true" >> "$GITHUB_OUTPUT" From 5c52b87ab6ba2a72b2aa208aa8c6374ac89dc67e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:42:10 +0200 Subject: [PATCH 05/22] Create noxfile.py --- noxfile.py | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 noxfile.py diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 00000000000..415e4fc3d5f --- /dev/null +++ b/noxfile.py @@ -0,0 +1,291 @@ +"""Perform test automation with nox. + +For further details, see https://nox.thea.codes/en/stable/# + +""" + +import hashlib +import os +from pathlib import Path + +import nox +from nox.logger import logger + +#: Default to reusing any pre-existing nox environments. +nox.options.reuse_existing_virtualenvs = True + +#: Python versions we can run sessions under +_PY_VERSIONS_ALL = ["3.11", "3.12", "3.13"] +_PY_VERSION_LATEST = _PY_VERSIONS_ALL[-1] + +#: One specific python version for docs builds +_PY_VERSION_DOCSBUILD = _PY_VERSION_LATEST + +#: Cirrus-CI environment variable hook. +PY_VER = os.environ.get("PY_VER", _PY_VERSIONS_ALL) + +#: Default cartopy cache directory. +CARTOPY_CACHE_DIR = os.environ.get("HOME") / Path(".local/share/cartopy") + +# https://github.com/numpy/numpy/pull/19478 +# https://github.com/matplotlib/matplotlib/pull/22099 +#: Common session environment variables. +ENV = dict(NPY_DISABLE_CPU_FEATURES="AVX512F,AVX512CD,AVX512_SKX") + + +def session_lockfile(session: nox.sessions.Session) -> Path: + """Return the path of the session lockfile.""" + return Path(f"requirements/locks/py{session.python.replace('.', '')}-linux-64.lock") + + +def session_cachefile(session: nox.sessions.Session) -> Path: + """Return the path of the session lockfile cache.""" + lockfile = session_lockfile(session) + tmp_dir = Path(session.create_tmp()) + cache = tmp_dir / lockfile.name + return cache + + +def venv_populated(session: nox.sessions.Session) -> bool: + """List of packages in the lockfile installed. + + Returns True if the conda venv has been created. + """ + return session_cachefile(session).is_file() + + +def venv_changed(session: nox.sessions.Session) -> bool: + """Return True if the installed session is different. + + Compares to that specified in the lockfile. + """ + changed = False + cache = session_cachefile(session) + lockfile = session_lockfile(session) + if cache.is_file(): + with open(lockfile, "rb") as fi: + expected = hashlib.sha256(fi.read()).hexdigest() + with open(cache, "r") as fi: + actual = fi.read() + changed = actual != expected + return changed + + +def cache_venv(session: nox.sessions.Session) -> None: + """Cache the nox session environment. + + This consists of saving a hexdigest (sha256) of the associated + conda lock file. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + lockfile = session_lockfile(session) + cache = session_cachefile(session) + with open(lockfile, "rb") as fi: + hexdigest = hashlib.sha256(fi.read()).hexdigest() + with open(cache, "w") as fout: + fout.write(hexdigest) + + +def cache_cartopy(session: nox.sessions.Session) -> None: + """Determine whether to cache the cartopy natural earth shapefiles. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + if not CARTOPY_CACHE_DIR.is_dir(): + session.run_always( + "python", + "-c", + "import cartopy; cartopy.io.shapereader.natural_earth()", + ) + + +def prepare_venv(session: nox.sessions.Session) -> None: + """Create and cache the nox session conda environment. + + Additionally provide conda environment package details and info. + + Note that, iris is installed into the environment using pip. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + Notes + ----- + See + - https://github.com/theacodes/nox/issues/346 + - https://github.com/theacodes/nox/issues/260 + + """ + lockfile = session_lockfile(session) + venv_dir = session.virtualenv.location_name + + if not venv_populated(session): + # environment has been created but packages not yet installed + # populate the environment from the lockfile + logger.debug(f"Populating conda env at {venv_dir}") + session.conda_install("--file", str(lockfile)) + cache_venv(session) + + elif venv_changed(session): + # destroy the environment and rebuild it + logger.debug(f"Lockfile changed. Re-creating conda env at {venv_dir}") + _re_orig = session.virtualenv.reuse_existing + session.virtualenv.reuse_existing = False + session.virtualenv.create() + session.conda_install("--file", str(lockfile)) + session.virtualenv.reuse_existing = _re_orig + cache_venv(session) + + logger.debug(f"Environment {venv_dir} is up to date") + + cache_cartopy(session) + + # Determine whether verbose diagnostics have been requested + # from the command line. + verbose = "-v" in session.posargs or "--verbose" in session.posargs + + if verbose: + session.run_always("conda", "info") + session.run_always("conda", "list", f"--prefix={venv_dir}") + session.run_always( + "conda", + "list", + f"--prefix={venv_dir}", + "--explicit", + ) + + +@nox.session(python=PY_VER, venv_backend="conda") +def tests(session: nox.sessions.Session): + """Perform iris system, integration and unit tests. + + Coverage testing is enabled if the "--coverage" or "-c" flag is used. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + prepare_venv(session) + session.install("--no-deps", "--editable", ".") + session.env.update(ENV) + run_args = [ + "pytest", + "-n", + "auto", + "lib/iris/tests", + ] + if "-c" in session.posargs or "--coverage" in session.posargs: + run_args[-1:-1] = ["--cov=lib/iris", "--cov-report=xml"] + session.run(*run_args) + + +@nox.session(python=_PY_VERSION_DOCSBUILD, venv_backend="conda") +def doctest(session: nox.sessions.Session): + """Perform iris doctests and gallery. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + prepare_venv(session) + session.install("--no-deps", "--editable", ".") + session.env.update(ENV) + session.cd("docs") + session.run( + "make", + "clean", + "html", + external=True, + ) + session.run( + "make", + "doctest", + external=True, + ) + + +@nox.session(python=_PY_VERSION_DOCSBUILD, venv_backend="conda") +def gallery(session: nox.sessions.Session): + """Perform iris gallery doc-tests. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + prepare_venv(session) + session.install("--no-deps", "--editable", ".") + session.env.update(ENV) + session.run( + "pytest", + "-n", + "auto", + "docs/gallery_tests", + ) + + +@nox.session(python=PY_VER, venv_backend="conda") +def wheel(session: nox.sessions.Session): + """Perform iris local wheel install and import test. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + prepare_venv(session) + session.cd("dist") + fname = list(Path(".").glob("scitools_iris-*.whl")) + if len(fname) == 0: + raise ValueError("Cannot find wheel to install.") + if len(fname) > 1: + emsg = f"Expected to find 1 wheel to install, found {len(fname)} instead." + raise ValueError(emsg) + session.install(fname[0].name) + session.run( + "python", + "-c", + "import iris; print(f'{iris.__version__=}')", + external=True, + ) + + +@nox.session +def benchmarks(session: nox.sessions.Session): + """Run the Iris benchmark runner. Run session with `-- --help` for help. + + Parameters + ---------- + session : object + A `nox.sessions.Session` object. + + """ + if len(session.posargs) == 0: + message = ( + "This session MUST be run with at least one argument. The " + "arguments are passed down to the benchmark runner script. E.g:\n" + "nox -s benchmarks -- --help\n" + "nox -s benchmarks -- something --help\n" + "nox -s benchmarks -- something\n" + ) + session.error(message) + session.install("asv", "nox") + bm_runner_path = Path(__file__).parent / "benchmarks" / "bm_runner.py" + session.run("python", bm_runner_path, *session.posargs) From 67848fc6585b50a05dda40fb579471b4c142ec89 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 20:51:19 +0200 Subject: [PATCH 06/22] Create refresh-lockfiles.yml --- .github/workflows/refresh-lockfiles.yml | 108 ++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 .github/workflows/refresh-lockfiles.yml diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml new file mode 100644 index 00000000000..d5839d81288 --- /dev/null +++ b/.github/workflows/refresh-lockfiles.yml @@ -0,0 +1,108 @@ +# This workflow periodically creates new environment lock files based on the newest +# available packages and dependencies. +# +# Environment specifications are given as conda environment.yml files found in +# `requirements/py**.yml`. These state the packages required, the conda channels +# that the packages will be pulled from, and any versions of packages that need to be +# pinned at specific versions. +# +# For environments that have changed, a pull request will be made and submitted +# to the main branch + +name: Refresh Lockfiles + + +on: + workflow_call: + +jobs: + get_python_matrix: + # Determines which Python versions should be included in the matrix used in + # the gen_lockfiles job. + if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'" + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.get_py.outputs.matrix }} + steps: + - uses: actions/checkout@v5 + - id: get_py + run: echo "MATRIX=$(ls -1 requirements/py*.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT} + + gen_lockfiles: + # This is a matrix job: it splits to create new lockfiles for each + # of the CI test python versions. + if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'" + runs-on: ubuntu-latest + needs: get_python_matrix + + strategy: + matrix: + python: ${{ fromJSON(needs.get_python_matrix.outputs.MATRIX) }} + + steps: + - uses: actions/checkout@v5 + - name: install requirements + run: | + source $CONDA/bin/activate base + conda update -n base --all + - name: generate lockfile + run: | + pipx run conda-lock -k explicit -p linux-64 -f requirements/${{matrix.python}}.yml + mv conda-linux-64.lock ${{matrix.python}}-linux-64.lock + - name: output lockfile + uses: actions/upload-artifact@v4 + with: + name: lock-artifacts-${{matrix.python}} + path: ${{matrix.python}}-linux-64.lock + + create_pr: + # Once the matrix job has completed all the lock files will have been + # uploaded as artifacts. + # Download the artifacts, add them to the repo, and create a PR. + if: "github.repository_owner == 'pydata' || github.event_name == 'workflow_dispatch'" + runs-on: ubuntu-latest + needs: gen_lockfiles + + steps: + - uses: actions/checkout@v5 + - name: get artifacts + uses: actions/download-artifact@v5 + with: + path: ${{ github.workspace }}/requirements/locks + merge-multiple: true + + - name: "Generate token" + uses: actions/create-github-app-token@v2 + id: generate-token + with: + app-id: ${{ secrets.AUTH_APP_ID }} + private-key: ${{ secrets.AUTH_APP_PRIVATE_KEY }} + + - name: Create Pull Request + id: cpr + uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e + with: + token: ${{ steps.generate-token.outputs.token }} + commit-message: Updated environment lockfiles + committer: "Lockfile bot " + author: "Lockfile bot " + delete-branch: true + branch: auto-update-lockfiles + title: "[CI Bot] environment lockfiles auto-update" + body: | + Lockfiles updated to the latest resolvable environment. + ### If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch. + labels: | + New: Pull Request + Bot + + - name: Check Pull Request + if: steps.cpr.outputs.pull-request-number != '' + run: | + echo "### :rocket: Pull-Request Summary" >> ${GITHUB_STEP_SUMMARY} + echo "" >> ${GITHUB_STEP_SUMMARY} + echo "The following lock-files pull-request has been auto-generated:" + echo "- **PR** #${{ steps.cpr.outputs.pull-request-number }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **URL** ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **Operation** [${{ steps.cpr.outputs.pull-request-operation }}]" >> ${GITHUB_STEP_SUMMARY} + echo "- **SHA** ${{ steps.cpr.outputs.pull-request-head-sha }}" >> ${GITHUB_STEP_SUMMARY} From e41113515dbf310024ec7487d22b95c5575627fc Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:32:42 +0200 Subject: [PATCH 07/22] find the environment files --- .github/workflows/refresh-lockfiles.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml index d5839d81288..64c64677c3d 100644 --- a/.github/workflows/refresh-lockfiles.yml +++ b/.github/workflows/refresh-lockfiles.yml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v5 - id: get_py - run: echo "MATRIX=$(ls -1 requirements/py*.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT} + run: echo "MATRIX=$(ls -1 ci/requirements/environment-*.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT} gen_lockfiles: # This is a matrix job: it splits to create new lockfiles for each @@ -47,7 +47,7 @@ jobs: conda update -n base --all - name: generate lockfile run: | - pipx run conda-lock -k explicit -p linux-64 -f requirements/${{matrix.python}}.yml + pipx run conda-lock -k explicit -p linux-64 -f ci/requirements/${{matrix.python}}.yml mv conda-linux-64.lock ${{matrix.python}}-linux-64.lock - name: output lockfile uses: actions/upload-artifact@v4 @@ -68,7 +68,7 @@ jobs: - name: get artifacts uses: actions/download-artifact@v5 with: - path: ${{ github.workspace }}/requirements/locks + path: ${{ github.workspace }}/ci/requirements/locks merge-multiple: true - name: "Generate token" From fd0334577427a7ad31088c53576b4be36ae8b8a9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:36:13 +0200 Subject: [PATCH 08/22] Update refresh-lockfiles.yml --- .github/workflows/refresh-lockfiles.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml index 64c64677c3d..676d7b9cb1f 100644 --- a/.github/workflows/refresh-lockfiles.yml +++ b/.github/workflows/refresh-lockfiles.yml @@ -13,6 +13,9 @@ name: Refresh Lockfiles on: + pull_request: + branches: + - "*" workflow_call: jobs: From d1a8ff6c6f854004bb4eb150844042d91d2d683e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:40:01 +0200 Subject: [PATCH 09/22] only create for benchmarks, others errors cant find valid jaxlib version --- .github/workflows/refresh-lockfiles.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml index 676d7b9cb1f..7aa8803426a 100644 --- a/.github/workflows/refresh-lockfiles.yml +++ b/.github/workflows/refresh-lockfiles.yml @@ -29,7 +29,7 @@ jobs: steps: - uses: actions/checkout@v5 - id: get_py - run: echo "MATRIX=$(ls -1 ci/requirements/environment-*.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT} + run: echo "MATRIX=$(ls -1 ci/requirements/environment-benchmark.yml | xargs -n1 basename | sed 's/....$//' | jq -cnR '[inputs]')" >> ${GITHUB_OUTPUT} gen_lockfiles: # This is a matrix job: it splits to create new lockfiles for each From f6b1294a47715b999763d8816f2fcdcc34bd559d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:44:44 +0200 Subject: [PATCH 10/22] Create environment-benchmark-linux-64.lock --- .../locks/environment-benchmark-linux-64.lock | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 ci/requirements/locks/environment-benchmark-linux-64.lock diff --git a/ci/requirements/locks/environment-benchmark-linux-64.lock b/ci/requirements/locks/environment-benchmark-linux-64.lock new file mode 100644 index 00000000000..c8c67b1bcf2 --- /dev/null +++ b/ci/requirements/locks/environment-benchmark-linux-64.lock @@ -0,0 +1,164 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 98a4801aafacb13b98a04850d749d850a6ee57bf6f26506db814286373f2ebaf +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-headers-1.21.0-ha770c72_1.conda#9e298d76f543deb06eb0f3413675e13a +https://conda.anaconda.org/conda-forge/linux-64/nlohmann_json-3.12.0-h3f2d84a_0.conda#d76872d096d063e226482c99337209dc +https://conda.anaconda.org/conda-forge/noarch/nomkl-1.0-h5ca1d4c_0.tar.bz2#9a66894dfd07c4510beb6b3f9672ccc0 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-8_cp313.conda#94305520c52a4aa3f6c2b1ff6008d9f8 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.8.3-hbd8a1cb_0.conda#74784ee3d225fc3dca89edb635b4e5cc +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1423503_1.conda#0be7c6e070c19105f966d3758448d018 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.1.0-h767d61c_5.conda#dcd5ff1940cd38f6df777cac86819d60 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.1.0-h767d61c_5.conda#264fbfba7fb20acf3b29cde153e345ce +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.2-h39aace5_0.conda#791365c5f65975051e4e017b5da3abf5 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.12.4-hb03c661_0.conda#ae5621814cb99642c9308977fe90ed0d +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda#f7f0d6cc2dc986d42ac2689ec88192be +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda#b38117a3c920364aff79f870c984b4a3 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb03c661_4.conda#1d29d2e33fe59954af82ef54a8af3fe1 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.1-hecca717_0.conda#4211416ecba1866fab0c6470986c22d6 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.1.0-h69a702a_5.conda#069afdf8ea72504e48d23ae1171d951c +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.1.0-hcea5267_5.conda#fbd4008644add05032b6764807ee2cba +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda#915f5995e94f60e9a4826e0b0920ee88 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda#c7e925f37e3b40d893459e625f6a53f1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.1.0-h8f9b012_5.conda#4e02a49aaa9d5190cb630fa43528fbe6 +https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.11.0-hb04c3b8_0.conda#34fb73fd2d5a613d8f17ce2eaa15a8a5 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.1-he9a06e4_0.conda#af930c65e9a79a3423d6d36e265cef65 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.3-h26f9b46_0.conda#72b3dd72e4f0b88cdacf3421313480f0 +https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda#a77f85f77be52ff59391544bfe73390a +https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.9.2-he7b75e1_1.conda#c04d1312e7feec369308d656c18e7f3e +https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.3.1-h92c474e_6.conda#3490e744cb8b9d5a3b9785839d618a17 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.2.4-h92c474e_1.conda#4ab554b102065910f098f88b40163835 +https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.2.7-h92c474e_2.conda#248831703050fe9a5b2680a7589fdba9 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda#d411fc29e338efb48c5fd4576d71d881 +https://conda.anaconda.org/conda-forge/linux-64/libabseil-20250512.1-cxx17_hba17884_0.conda#83b160d4da3e1e847bf044997621ed63 +https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.4-h3f801dc_0.conda#01ba04e414e47f95c03d6ddd81fd37be +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb03c661_4.conda#5cb5a1c9a94a78f5b23684bcb845338d +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb03c661_4.conda#2e55011fa483edb8bfe3fd92e860cd79 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.1.0-h69a702a_5.conda#0c91408b3dec0b97e8a3c694845bd63b +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.50.4-h0c1763c_0.conda#0b367fad34931cb79e0d6b7e5c06bb1c +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.1.0-h4852527_5.conda#8bba50c7f4679f08c861b597ad2bda6b +https://conda.anaconda.org/conda-forge/linux-64/libzip-1.11.2-h6991a6a_0.conda#a7b27c075c9b7f459f1c022090697cba +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.23-h8e187f5_0.conda#edd15d7a5914dc1d87617a2b7c582d23 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.2-h03e3b7b_0.conda#3d8da0248bdae970b4ade636a104b7f5 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_hd72426e_102.conda#a0116df4f4ed05c303811a837d5b39d8 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.21.2-h6252d9a_1.conda#cf5e9b21384fdb75b15faf397551c247 +https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda#2c2fae981fd2afd00812c92ac47d023d +https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda#ff862eebdfeb2fd048ae9dc92510baca +https://conda.anaconda.org/conda-forge/linux-64/hdf4-4.2.15-h2a13503_7.conda#bd77f8da987968ec3927990495dc22e4 +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda#b499ce4b026493a13774bcf0f4c33849 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_2.conda#dfc5aae7b043d9f56ba99514d5e60625 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-6.31.1-h9ef548d_1.conda#b92e2a26764fcadb4304add7e698ccf2 +https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2025.08.12-h7b12aa8_1.conda#0a801dabf8776bb86b12091d2f99377e +https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.22.0-h454ac66_1.conda#8ed82d90e6b1686f5e98f8b7825a15ef +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.7-h2b335a9_100_cp313.conda#724dcf9960e933838247971da07fe5cf +https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.5.5-h149bd38_3.conda#f9bff8c2a205ee0f28b0c61dad849a98 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.10.4-h37a7233_0.conda#d828cb0be64d51e27eebe354a2907a98 +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py313h7033f15_4.conda#bc8624c405856b1d047dd0a81829b08c +https://conda.anaconda.org/conda-forge/noarch/certifi-2025.8.3-pyhd8ed1ab_0.conda#11f59985f49df4620890f3e746ed7102 +https://conda.anaconda.org/conda-forge/noarch/click-8.2.1-pyh707e725_0.conda#94b550b8d3a614dbd326af798c7dfb40 +https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.1-pyhd8ed1ab_0.conda#364ba6c9fb03886ac979b482f39ebb92 +https://conda.anaconda.org/conda-forge/linux-64/crc32c-2.7.1-py313h54dd161_2.conda#1b52ef3cbbb8a4108c78c7a73fe31450 +https://conda.anaconda.org/conda-forge/noarch/fsspec-2025.9.0-pyhd8ed1ab_0.conda#76f492bd8ba8a0fb80ffe16fc1a75b3b +https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e +https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-35_h4a7cf45_openblas.conda#6da7e852c812a84096b68158574398d0 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.14.1-h332b0f4_0.conda#45f6713cb00f124af300342512219182 +https://conda.anaconda.org/conda-forge/linux-64/libxml2-16-2.15.0-ha9997c6_0.conda#84bed2bfefc14e4878bd16979782e522 +https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.44.0-py313hfdae721_2.conda#dd0d7947635c0c524608eab7db55dcc9 +https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py313h8060acc_1.conda#21b62c55924f01b6eef6827167b46acb +https://conda.anaconda.org/conda-forge/linux-64/msgpack-python-1.1.1-py313h7037e92_1.conda#cc41d40a7ec345da56c496767d4bb61b +https://conda.anaconda.org/conda-forge/noarch/opt_einsum-3.4.0-pyhd8ed1ab_1.conda#52919815cd35c4e1a0298af658ccda04 +https://conda.anaconda.org/conda-forge/linux-64/orc-2.2.0-h1bc01a4_0.conda#53ab33c0b0ba995d2546e54b2160f3fd +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/linux-64/psutil-7.0.0-py313h07c4f96_1.conda#5a7c24c9dc49128731ae565cf598cde4 +https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda#12c566707c80111f9799308d9e265aef +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py313h8060acc_2.conda#50992ba61a8a1f8c2d346168ae1c86df +https://conda.anaconda.org/conda-forge/linux-64/re2-2025.08.12-h5301d42_1.conda#4637c13ff87424af0f6a981ab6f5ffa5 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda#3339e3b65d58accf4ca4fb8748ab16b3 +https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda#0401a17ae845fa72c7210e206ec5647d +https://conda.anaconda.org/conda-forge/noarch/tblib-3.1.0-pyhd8ed1ab_0.conda#a15c62b8a306b8978f094f76da2f903f +https://conda.anaconda.org/conda-forge/noarch/toolz-1.0.0-pyhd8ed1ab_1.conda#40d0ed782a8aaa16ef248e68c06c168d +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.2-py313h07c4f96_1.conda#45821154b9cb2fb63c2b354c76086954 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda#0caa1af407ecff61170c9437a808404d +https://conda.anaconda.org/conda-forge/linux-64/wrapt-1.17.3-py313h07c4f96_1.conda#c2662497e9a9ff2153753682f53989c9 +https://conda.anaconda.org/conda-forge/noarch/zict-3.0.0-pyhd8ed1ab_1.conda#e52c2ef711ccf31bb7f70ca87d144b9e +https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhd8ed1ab_0.conda#df5e78d904988eb55042c0c97446079f +https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.9.0-h0fbd49f_19.conda#24139f2990e92effbeb374a0eb33fdb1 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.13.3-h19deb91_3.conda#1680d64986f8263978c3624f677656c8 +https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.16.0-h3a458e0_1.conda#682cb082bbd998528c51f1e77d9ce415 +https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.1-py313hf01b4d8_1.conda#c4a0f01c46bc155d205694bec57bd709 +https://conda.anaconda.org/conda-forge/linux-64/cytoolz-1.0.1-py313h536fd9c_0.conda#e886bb6a3c24f8b9dd4fcd1d617a1f64 +https://conda.anaconda.org/conda-forge/noarch/deprecated-1.2.18-pyhd8ed1ab_0.conda#0cef44b1754ae4d6924ac0eef6b9fdbe +https://conda.anaconda.org/conda-forge/noarch/donfig-0.8.1.post1-pyhd8ed1ab_1.conda#c56a7fa5597ad78b62e1f5d21f7f8b8f +https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda#164fc43f0b53b6e3a7bc7dce5e4f1dc9 +https://conda.anaconda.org/conda-forge/linux-64/hdf5-1.14.6-nompi_h6e4c0c1_103.conda#c74d83614aec66227ae5199d98852aaf +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.7.0-pyhe01879c_1.conda#63ccfdc3a3ce25b027b8767eb722fca8 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-35_h0358290_openblas.conda#8aa3389d36791ecd31602a247b1f3641 +https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.73.1-h1e535eb_0.conda#8075d8550f773a17288c7ec2cf2f2d56 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-35_h47877c9_openblas.conda#aa0b36b71d44f74686f13b9bfabec891 +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.15.0-h26afc86_0.conda#c52b54db4660b44ca75b6a61c533b9f5 +https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda#0badf9c54e24cecfb0ad2f99d680c163 +https://conda.anaconda.org/conda-forge/linux-64/prometheus-cpp-1.3.0-ha5d0236_0.conda#a83f6a2fdc079e643237887a37460668 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda#5b8d21249ff20967101ffa321cab24e8 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.8.6-h800fcd2_2.conda#50e0900a33add0c715f17648de6be786 +https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.12.0-ha729027_0.conda#3dab8d6fa3d10fe4104f1fbe59c10176 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.10.0-h4bb41a7_3.conda#1efaf34774bfb92ecf2fa8fa985b2752 +https://conda.anaconda.org/conda-forge/noarch/dask-core-2025.9.1-pyhcf101f3_0.conda#c49de33395d775a92ea90e0cb34c3577 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.39.0-hdb79228_0.conda#a2e30ccd49f753fd30de0d30b1569789 +https://conda.anaconda.org/conda-forge/linux-64/libnetcdf-4.9.3-nompi_h11f7409_103.conda#3ccff1066c05a1e6c221356eecc40581 +https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-1.21.0-hb9b0907_1.conda#1c0320794855f457dea27d35c4c71e23 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py313h17eae1a_0.conda#7a2d2f9adecd86ed5c29c2115354f615 +https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.25.0-py313h54dd161_0.conda#1fe43bd1fc86e22ad3eb0edec637f8a2 +https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.33.1-hb4fd278_2.conda#81c545e27e527ca1be0cc04b74c20386 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.14.0-hb1c9500_1.conda#30da390c211967189c58f83ab58a6f0c +https://conda.anaconda.org/conda-forge/linux-64/bottleneck-1.6.0-py313h29aa505_0.conda#02405ff909c10e59bf13527f8df3910c +https://conda.anaconda.org/conda-forge/linux-64/cftime-1.6.4-py313h29aa505_2.conda#1363e8db910e403edc8fd486f8470ec6 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.39.0-hdbdcf42_0.conda#bd21962ff8a9d1ce4720d42a35a4af40 +https://conda.anaconda.org/conda-forge/linux-64/numba-0.61.2-py313h50b8c88_1.conda#53c79b7cdee329ed4c77cafe27600cdb +https://conda.anaconda.org/conda-forge/linux-64/numcodecs-0.16.1-py313h08cd8bf_1.conda#5c1c296392a81820e2332b3315f58b66 +https://conda.anaconda.org/conda-forge/linux-64/numexpr-2.12.1-py313h24ae7f9_100.conda#8e5d3d84d8091537034c021420853613 +https://conda.anaconda.org/conda-forge/noarch/numpy_groupies-0.11.3-pyhd8ed1ab_0.conda#5402c2b046432ceb2d192a82802e7854 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.3.2-py313h08cd8bf_0.conda#5f4cc42e08d6d862b7b919a3c8959e0b +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.16.2-py313h11c21cd_0.conda#85a80978a04be9c290b8fe6d9bccff1c +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.5.0-pyhd8ed1ab_0.conda#436c165519e140cb08d246a4472a9d6a +https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.606-h31ade35_1.conda#e33b3d2a2d44ba0fb35373d2343b71dd +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.12.0-h8b27e44_3.conda#7b738aea4f1b8ae2d1118156ad3ae993 +https://conda.anaconda.org/conda-forge/noarch/distributed-2025.9.1-pyhcf101f3_0.conda#f140b63da44c9a3fc7ae75cb9cc53c47 +https://conda.anaconda.org/conda-forge/noarch/flox-0.10.6-pyhd8ed1ab_0.conda#40136da5d8e93ccbd406518154763fd9 +https://conda.anaconda.org/conda-forge/linux-64/netcdf4-1.7.2-nompi_py313hfae5b86_104.conda#b6ddba788230a41a534cf288d41a1df4 +https://conda.anaconda.org/conda-forge/noarch/numbagg-0.9.2-pyhd8ed1ab_0.conda#5e01f678d82477576cb4d56cc6e9357f +https://conda.anaconda.org/conda-forge/noarch/sparse-0.17.0-pyhcf101f3_0.conda#1b59de14a7e5888f939611e1fe329e00 +https://conda.anaconda.org/conda-forge/noarch/zarr-3.1.2-pyhcf101f3_0.conda#2bdb3950ea64a365bfe9e6414e748a9b +https://conda.anaconda.org/conda-forge/linux-64/libarrow-21.0.0-hb708d0b_3_cpu.conda#2d0305c8802fcba095d8d4e14e66ed3b +https://conda.anaconda.org/conda-forge/linux-64/libarrow-compute-21.0.0-h8c2c5c3_3_cpu.conda#b0b73752adfcbe6b73ef9f2eb5d5cf03 +https://conda.anaconda.org/conda-forge/linux-64/libparquet-21.0.0-h790f06f_3_cpu.conda#0568ba99a1f6c0ef7a04ca23dc78905a +https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-21.0.0-h635bf11_3_cpu.conda#12fe67afbd946adae49856b275478d0f +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-21.0.0-py313he109ebe_0_cpu.conda#3018b7f30825c21c47a7a1e061459f96 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-21.0.0-h635bf11_3_cpu.conda#630dfffcaf67b800607164d4b5b08bf7 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-21.0.0-h3f74fd7_3_cpu.conda#595ca398ad8dcac76a315f358e3312a6 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-21.0.0-py313h78bf25f_0.conda#1580ddd94606ccb60270877cb8838562 From f33a0e3b93236681ffcdf65e5e2e08d511b33c2f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:51:43 +0200 Subject: [PATCH 11/22] explicit lock file for now. --- noxfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 415e4fc3d5f..00c7260ed3f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -35,7 +35,8 @@ def session_lockfile(session: nox.sessions.Session) -> Path: """Return the path of the session lockfile.""" - return Path(f"requirements/locks/py{session.python.replace('.', '')}-linux-64.lock") + # return Path(f"requirements/locks/py{session.python.replace('.', '')}-linux-64.lock") + return Path(f"requirements/locks/environment-benchmark-linux-64.lock") def session_cachefile(session: nox.sessions.Session) -> Path: From cd1bb0f8c0f16824ef508b1e7b9d0c61625a80e3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:54:25 +0200 Subject: [PATCH 12/22] locks in ci folder --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 00c7260ed3f..dc3e5ca5910 100644 --- a/noxfile.py +++ b/noxfile.py @@ -35,8 +35,8 @@ def session_lockfile(session: nox.sessions.Session) -> Path: """Return the path of the session lockfile.""" - # return Path(f"requirements/locks/py{session.python.replace('.', '')}-linux-64.lock") - return Path(f"requirements/locks/environment-benchmark-linux-64.lock") + # return Path(f"ci/requirements/locks/py{session.python.replace('.', '')}-linux-64.lock") + return Path(f"ci/requirements/locks/environment-benchmark-linux-64.lock") def session_cachefile(session: nox.sessions.Session) -> Path: From 4089c79c834b1a7ff5f743852a18af5eed4cd593 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:01:16 +0200 Subject: [PATCH 13/22] no cartopy --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index dc3e5ca5910..3c8f92b9586 100644 --- a/noxfile.py +++ b/noxfile.py @@ -150,7 +150,7 @@ def prepare_venv(session: nox.sessions.Session) -> None: logger.debug(f"Environment {venv_dir} is up to date") - cache_cartopy(session) + # cache_cartopy(session) # Determine whether verbose diagnostics have been requested # from the command line. From 7d5cac4ebea854c03bb245378d8a1f3a37d595fc Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:04:26 +0200 Subject: [PATCH 14/22] locks in ci folder --- benchmarks/bm_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py index dc2e174f52c..c0da5b5fb47 100644 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -647,7 +647,7 @@ def func(args: argparse.Namespace) -> None: # Find the most recent commit where the lock-files are not # identical to HEAD - will force environment updates. - locks_dir = Path(__file__).parents[1] / "requirements" / "locks" + locks_dir = Path(__file__).parents[1] / "ci" / "requirements" / "locks" assert locks_dir.is_dir() git_command = shlex.split( f"git log -1 --pretty=format:%P -- {locks_dir.resolve()}" From 49a028317b98398db840199b6a45d628bc83d083 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:07:23 +0200 Subject: [PATCH 15/22] ci folder... --- benchmarks/asv_delegated.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index bbb4b03ea56..8b25b8840a1 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -91,7 +91,7 @@ class Mode(enum.Enum): # Just NOX for now but the architecture is here for future cases. case Mode.NOX: # Need to determine a single Python version to run with. - req_dir = build_dir / "requirements" + req_dir = build_dir / "ci" / "requirements" lockfile_dir = req_dir / "locks" if not lockfile_dir.is_dir(): lockfile_dir = req_dir / "ci" / "nox.lock" From f25923b4b8715038f2f6a396db585bc8da94c5ba Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:22:23 +0200 Subject: [PATCH 16/22] Update asv_delegated.py --- benchmarks/asv_delegated.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index 8b25b8840a1..45aae88b6ca 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -97,12 +97,12 @@ class Mode(enum.Enum): lockfile_dir = req_dir / "ci" / "nox.lock" if not lockfile_dir.is_dir(): - message = "No lockfile directory found in the expected locations." + message = f"No lockfile directory found in the expected locations, got {lockfile_dir}." raise FileNotFoundError(message) def py_ver_from_lockfiles(lockfile: Path) -> str: pattern = re.compile(r"py(\d+)-") - search = pattern.search(lockfile.name) + search = pattern.search(lockfile.name assert search is not None version = search.group(1) return f"{version[0]}.{version[1:]}" From ed6fa58e4757d081793f0c4a0869feec6437319e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:28:14 +0200 Subject: [PATCH 17/22] static 3.13 for now --- benchmarks/asv_delegated.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index 45aae88b6ca..232bf7ab98b 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -102,16 +102,17 @@ class Mode(enum.Enum): def py_ver_from_lockfiles(lockfile: Path) -> str: pattern = re.compile(r"py(\d+)-") - search = pattern.search(lockfile.name + search = pattern.search(lockfile.name) assert search is not None version = search.group(1) return f"{version[0]}.{version[1:]}" - python_versions = [ - py_ver_from_lockfiles(lockfile) - for lockfile in lockfile_dir.glob("*.lock") - ] - python_version = max(python_versions) + # python_versions = [ + # py_ver_from_lockfiles(lockfile) + # for lockfile in lockfile_dir.glob("*.lock") + # ] + # python_version = max(python_versions) + python_version = "3.13" # Construct and run the environment preparation command. local_envs = dict(environ) From 5cde8d2823e010b0405a5a539cdd449b2e19901e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:40:10 +0200 Subject: [PATCH 18/22] not needed when hardcoding py version --- benchmarks/asv_delegated.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/benchmarks/asv_delegated.py b/benchmarks/asv_delegated.py index 232bf7ab98b..85ba432ac75 100644 --- a/benchmarks/asv_delegated.py +++ b/benchmarks/asv_delegated.py @@ -91,21 +91,21 @@ class Mode(enum.Enum): # Just NOX for now but the architecture is here for future cases. case Mode.NOX: # Need to determine a single Python version to run with. - req_dir = build_dir / "ci" / "requirements" - lockfile_dir = req_dir / "locks" - if not lockfile_dir.is_dir(): - lockfile_dir = req_dir / "ci" / "nox.lock" - - if not lockfile_dir.is_dir(): - message = f"No lockfile directory found in the expected locations, got {lockfile_dir}." - raise FileNotFoundError(message) - - def py_ver_from_lockfiles(lockfile: Path) -> str: - pattern = re.compile(r"py(\d+)-") - search = pattern.search(lockfile.name) - assert search is not None - version = search.group(1) - return f"{version[0]}.{version[1:]}" + # req_dir = build_dir / "ci" / "requirements" + # lockfile_dir = req_dir / "locks" + # if not lockfile_dir.is_dir(): + # lockfile_dir = req_dir / "ci" / "nox.lock" + + # if not lockfile_dir.is_dir(): + # message = f"No lockfile directory found in the expected locations, got '{lockfile_dir}'." + # raise FileNotFoundError(message) + + # def py_ver_from_lockfiles(lockfile: Path) -> str: + # pattern = re.compile(r"py(\d+)-") + # search = pattern.search(lockfile.name) + # assert search is not None + # version = search.group(1) + # return f"{version[0]}.{version[1:]}" # python_versions = [ # py_ver_from_lockfiles(lockfile) From 928ad20779943216ffb9928055cbc8e64547a62c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 22:52:18 +0200 Subject: [PATCH 19/22] Update noxfile.py --- noxfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/noxfile.py b/noxfile.py index 3c8f92b9586..def44ff487e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -129,6 +129,7 @@ def prepare_venv(session: nox.sessions.Session) -> None: """ lockfile = session_lockfile(session) + print(f"prepare_venv: {lockfile}") venv_dir = session.virtualenv.location_name if not venv_populated(session): From 4fd354aa95c9a41040575de71b692c244a3dbb68 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:06:48 +0200 Subject: [PATCH 20/22] remove iris tests --- benchmarks/benchmarks_iris/__init__.py | 76 ----- .../benchmarks_iris/aggregate_collapse.py | 212 ------------- benchmarks/benchmarks_iris/cperf/__init__.py | 92 ------ benchmarks/benchmarks_iris/cperf/equality.py | 55 ---- benchmarks/benchmarks_iris/cperf/load.py | 55 ---- benchmarks/benchmarks_iris/cperf/save.py | 40 --- benchmarks/benchmarks_iris/cube.py | 116 -------- .../benchmarks_iris/generate_data/__init__.py | 135 --------- .../benchmarks_iris/generate_data/stock.py | 184 ------------ .../benchmarks_iris/generate_data/ugrid.py | 190 ------------ .../benchmarks_iris/generate_data/um_files.py | 198 ------------- benchmarks/benchmarks_iris/import_iris.py | 278 ------------------ benchmarks/benchmarks_iris/iterate.py | 26 -- benchmarks/benchmarks_iris/load/__init__.py | 221 -------------- benchmarks/benchmarks_iris/load/ugrid.py | 115 -------- benchmarks/benchmarks_iris/merge_concat.py | 72 ----- benchmarks/benchmarks_iris/mesh/__init__.py | 5 - .../benchmarks_iris/mesh/utils/__init__.py | 5 - .../mesh/utils/regions_combine.py | 227 -------------- benchmarks/benchmarks_iris/plot.py | 34 --- benchmarks/benchmarks_iris/regridding.py | 119 -------- benchmarks/benchmarks_iris/save.py | 43 --- benchmarks/benchmarks_iris/sperf/__init__.py | 38 --- .../benchmarks_iris/sperf/combine_regions.py | 234 --------------- benchmarks/benchmarks_iris/sperf/equality.py | 35 --- benchmarks/benchmarks_iris/sperf/load.py | 27 -- benchmarks/benchmarks_iris/sperf/save.py | 50 ---- benchmarks/benchmarks_iris/stats.py | 52 ---- benchmarks/benchmarks_iris/trajectory.py | 56 ---- .../unit_style/__init__disabled.py | 16 - .../benchmarks_iris/unit_style/aux_factory.py | 52 ---- .../benchmarks_iris/unit_style/coords.py | 129 -------- benchmarks/benchmarks_iris/unit_style/cube.py | 252 ---------------- benchmarks/benchmarks_iris/unit_style/mesh.py | 187 ------------ .../unit_style/metadata_manager_factory.py | 83 ------ .../benchmarks_iris/unit_style/mixin.py | 78 ----- 36 files changed, 3787 deletions(-) delete mode 100644 benchmarks/benchmarks_iris/__init__.py delete mode 100644 benchmarks/benchmarks_iris/aggregate_collapse.py delete mode 100644 benchmarks/benchmarks_iris/cperf/__init__.py delete mode 100644 benchmarks/benchmarks_iris/cperf/equality.py delete mode 100644 benchmarks/benchmarks_iris/cperf/load.py delete mode 100644 benchmarks/benchmarks_iris/cperf/save.py delete mode 100644 benchmarks/benchmarks_iris/cube.py delete mode 100644 benchmarks/benchmarks_iris/generate_data/__init__.py delete mode 100644 benchmarks/benchmarks_iris/generate_data/stock.py delete mode 100644 benchmarks/benchmarks_iris/generate_data/ugrid.py delete mode 100644 benchmarks/benchmarks_iris/generate_data/um_files.py delete mode 100644 benchmarks/benchmarks_iris/import_iris.py delete mode 100644 benchmarks/benchmarks_iris/iterate.py delete mode 100644 benchmarks/benchmarks_iris/load/__init__.py delete mode 100644 benchmarks/benchmarks_iris/load/ugrid.py delete mode 100644 benchmarks/benchmarks_iris/merge_concat.py delete mode 100644 benchmarks/benchmarks_iris/mesh/__init__.py delete mode 100644 benchmarks/benchmarks_iris/mesh/utils/__init__.py delete mode 100644 benchmarks/benchmarks_iris/mesh/utils/regions_combine.py delete mode 100644 benchmarks/benchmarks_iris/plot.py delete mode 100644 benchmarks/benchmarks_iris/regridding.py delete mode 100644 benchmarks/benchmarks_iris/save.py delete mode 100644 benchmarks/benchmarks_iris/sperf/__init__.py delete mode 100644 benchmarks/benchmarks_iris/sperf/combine_regions.py delete mode 100644 benchmarks/benchmarks_iris/sperf/equality.py delete mode 100644 benchmarks/benchmarks_iris/sperf/load.py delete mode 100644 benchmarks/benchmarks_iris/sperf/save.py delete mode 100644 benchmarks/benchmarks_iris/stats.py delete mode 100644 benchmarks/benchmarks_iris/trajectory.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/__init__disabled.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/aux_factory.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/coords.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/cube.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/mesh.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py delete mode 100644 benchmarks/benchmarks_iris/unit_style/mixin.py diff --git a/benchmarks/benchmarks_iris/__init__.py b/benchmarks/benchmarks_iris/__init__.py deleted file mode 100644 index e41fe6388d9..00000000000 --- a/benchmarks/benchmarks_iris/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Common code for benchmarks.""" - -from os import environ - -import iris - -from . import generate_data -from .generate_data.um_files import create_um_files - - -def disable_repeat_between_setup(benchmark_object): - """Benchmark where object persistence would be inappropriate (decorator). - - E.g: - - * Benchmarking data realisation - * Benchmarking Cube coord addition - - Can be applied to benchmark classes/methods/functions. - - https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks - - """ - # Prevent repeat runs between setup() runs - object(s) will persist after 1st. - benchmark_object.number = 1 - # Compensate for reduced certainty by increasing number of repeats. - # (setup() is run between each repeat). - # Minimum 5 repeats, run up to 30 repeats / 20 secs whichever comes first. - benchmark_object.repeat = (5, 30, 20.0) - # ASV uses warmup to estimate benchmark time before planning the real run. - # Prevent this, since object(s) will persist after first warmup run, - # which would give ASV misleading info (warmups ignore ``number``). - benchmark_object.warmup_time = 0.0 - - return benchmark_object - - -def on_demand_benchmark(benchmark_object): - """Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set. - - This is a decorator. - - For benchmarks that, for whatever reason, should not be run by default. - E.g: - - * Require a local file - * Used for scalability analysis instead of commit monitoring. - - Can be applied to benchmark classes/methods/functions. - - """ - if "ON_DEMAND_BENCHMARKS" in environ: - return benchmark_object - - -@on_demand_benchmark -class ValidateSetup: - """Simple benchmarks that exercise all elements of our setup.""" - - params = [1, 2] - - def setup(self, param): - generate_data.REUSE_DATA = False - (self.file_path,) = create_um_files( - param, param, param, param, False, ["NetCDF"] - ).values() - - def time_validate(self, param): - _ = iris.load(self.file_path) - - def tracemalloc_validate(self, param): - _ = iris.load(self.file_path) diff --git a/benchmarks/benchmarks_iris/aggregate_collapse.py b/benchmarks/benchmarks_iris/aggregate_collapse.py deleted file mode 100644 index 4d5d2923bc1..00000000000 --- a/benchmarks/benchmarks_iris/aggregate_collapse.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``.""" - -import warnings - -import numpy as np - -from iris import analysis, coords, cube -from iris.warnings import IrisVagueMetadataWarning - -from .generate_data.stock import realistic_4d_w_everything - - -class AggregationMixin: - params = [[False, True]] - param_names = ["Lazy operations"] - - def setup(self, lazy_run: bool): - warnings.filterwarnings("ignore", message="Ignoring a datum") - warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning) - cube = realistic_4d_w_everything(lazy=lazy_run) - - for cm in cube.cell_measures(): - cube.remove_cell_measure(cm) - for av in cube.ancillary_variables(): - cube.remove_ancillary_variable(av) - - agg_mln_data = np.arange(0, 70, 10) - agg_mln_repeat = np.repeat(agg_mln_data, 10) - - cube = cube[..., :10, :10] - - self.mln_aux = "aggregatable" - self.mln = "model_level_number" - agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name=self.mln_aux) - - if lazy_run: - agg_mln_coord.points = agg_mln_coord.lazy_points() - cube.add_aux_coord(agg_mln_coord, 1) - self.cube = cube - - -class Aggregation(AggregationMixin): - def time_aggregated_by_MEAN(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.MEAN).data - - def time_aggregated_by_COUNT(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.COUNT, function=lambda values: values > 280 - ).data - - def time_aggregated_by_GMEAN(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.GMEAN).data - - def time_aggregated_by_HMEAN(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.HMEAN).data - - def time_aggregated_by_MAX_RUN(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.MAX_RUN, function=lambda values: values > 280 - ).data - - def time_aggregated_by_MAX(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.MAX).data - - def time_aggregated_by_MEDIAN(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.MEDIAN).data - - def time_aggregated_by_MIN(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.MIN).data - - def time_aggregated_by_PEAK(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.PEAK).data - - def time_aggregated_by_PERCENTILE(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.PERCENTILE, percent=[10, 50, 90] - ).data - - def time_aggregated_by_FAST_PERCENTILE(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, - analysis.PERCENTILE, - mdtol=0, - percent=[10, 50, 90], - fast_percentile_method=True, - ).data - - def time_aggregated_by_PROPORTION(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, - analysis.PROPORTION, - function=lambda values: values > 280, - ).data - - def time_aggregated_by_STD_DEV(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.STD_DEV).data - - def time_aggregated_by_VARIANCE(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.VARIANCE).data - - def time_aggregated_by_RMS(self, _): - _ = self.cube.aggregated_by(self.mln_aux, analysis.RMS).data - - def time_collapsed_by_MEAN(self, _): - _ = self.cube.collapsed(self.mln, analysis.MEAN).data - - def time_collapsed_by_COUNT(self, _): - _ = self.cube.collapsed( - self.mln, analysis.COUNT, function=lambda values: values > 280 - ).data - - def time_collapsed_by_GMEAN(self, _): - _ = self.cube.collapsed(self.mln, analysis.GMEAN).data - - def time_collapsed_by_HMEAN(self, _): - _ = self.cube.collapsed(self.mln, analysis.HMEAN).data - - def time_collapsed_by_MAX_RUN(self, _): - _ = self.cube.collapsed( - self.mln, analysis.MAX_RUN, function=lambda values: values > 280 - ).data - - def time_collapsed_by_MAX(self, _): - _ = self.cube.collapsed(self.mln, analysis.MAX).data - - def time_collapsed_by_MEDIAN(self, _): - _ = self.cube.collapsed(self.mln, analysis.MEDIAN).data - - def time_collapsed_by_MIN(self, _): - _ = self.cube.collapsed(self.mln, analysis.MIN).data - - def time_collapsed_by_PEAK(self, _): - _ = self.cube.collapsed(self.mln, analysis.PEAK).data - - def time_collapsed_by_PERCENTILE(self, _): - _ = self.cube.collapsed( - self.mln, analysis.PERCENTILE, percent=[10, 50, 90] - ).data - - def time_collapsed_by_FAST_PERCENTILE(self, _): - _ = self.cube.collapsed( - self.mln, - analysis.PERCENTILE, - mdtol=0, - percent=[10, 50, 90], - fast_percentile_method=True, - ).data - - def time_collapsed_by_PROPORTION(self, _): - _ = self.cube.collapsed( - self.mln, analysis.PROPORTION, function=lambda values: values > 280 - ).data - - def time_collapsed_by_STD_DEV(self, _): - _ = self.cube.collapsed(self.mln, analysis.STD_DEV).data - - def time_collapsed_by_VARIANCE(self, _): - _ = self.cube.collapsed(self.mln, analysis.VARIANCE).data - - def time_collapsed_by_RMS(self, _): - _ = self.cube.collapsed(self.mln, analysis.RMS).data - - -class WeightedAggregation(AggregationMixin): - def setup(self, lazy_run): - super().setup(lazy_run) - - weights = np.linspace(0, 1, 70) - weights = np.broadcast_to(weights, self.cube.shape[:2]) - weights = np.broadcast_to(weights.T, self.cube.shape[::-1]) - weights = weights.T - - self.weights = weights - - ## currently has problems with indexing weights - # def time_w_aggregated_by_WPERCENTILE(self, _): - # _ = self.cube.aggregated_by( - # self.mln_aux, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90] - # ).data - - def time_w_aggregated_by_SUM(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.SUM, weights=self.weights - ).data - - def time_w_aggregated_by_RMS(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.RMS, weights=self.weights - ).data - - def time_w_aggregated_by_MEAN(self, _): - _ = self.cube.aggregated_by( - self.mln_aux, analysis.MEAN, weights=self.weights - ).data - - def time_w_collapsed_by_WPERCENTILE(self, _): - _ = self.cube.collapsed( - self.mln, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90] - ).data - - def time_w_collapsed_by_SUM(self, _): - _ = self.cube.collapsed(self.mln, analysis.SUM, weights=self.weights).data - - def time_w_collapsed_by_RMS(self, _): - _ = self.cube.collapsed(self.mln, analysis.RMS, weights=self.weights).data - - def time_w_collapsed_by_MEAN(self, _): - _ = self.cube.collapsed(self.mln, analysis.MEAN, weights=self.weights).data diff --git a/benchmarks/benchmarks_iris/cperf/__init__.py b/benchmarks/benchmarks_iris/cperf/__init__.py deleted file mode 100644 index 05a086bc44b..00000000000 --- a/benchmarks/benchmarks_iris/cperf/__init__.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project. - -CPerf = comparing performance working with data in UM versus LFRic formats. - -Files available from the UK Met Office: - moo ls moose:/adhoc/projects/avd/asv/data_for_nightly_tests/ -""" - -import numpy as np - -from iris import load_cube - -from ..generate_data import BENCHMARK_DATA -from ..generate_data.ugrid import make_cubesphere_testfile - -# The data of the core test UM files has dtype=np.float32 shape=(1920, 2560) -_UM_DIMS_YX = (1920, 2560) -# The closest cubesphere size in terms of datapoints is sqrt(1920*2560 / 6) -# This gives ~= 905, i.e. "C905" -_N_CUBESPHERE_UM_EQUIVALENT = int(np.sqrt(np.prod(_UM_DIMS_YX) / 6)) - - -class SingleDiagnosticMixin: - """For use in any benchmark classes that work on a single diagnostic file.""" - - params = [ - ["LFRic", "UM", "UM_lbpack0", "UM_netcdf"], - [False, True], - [False, True], - ] - param_names = ["file type", "height dim (len 71)", "time dim (len 3)"] - - def setup(self, file_type, three_d, three_times): - if file_type == "LFRic": - # Generate an appropriate synthetic LFRic file. - if three_times: - n_times = 3 - else: - n_times = 1 - - # Use a cubesphere size ~equivalent to our UM test data. - cells_per_panel_edge = _N_CUBESPHERE_UM_EQUIVALENT - create_kwargs = dict(c_size=cells_per_panel_edge, n_times=n_times) - - if three_d: - create_kwargs["n_levels"] = 71 - - # Will reuse a file if already present. - file_path = make_cubesphere_testfile(**create_kwargs) - - else: - # Locate the appropriate UM file. - if three_times: - # pa/pb003 files - numeric = "003" - else: - # pa/pb000 files - numeric = "000" - - if three_d: - # theta diagnostic, N1280 file w/ 71 levels (1920, 2560, 71) - file_name = f"umglaa_pb{numeric}-theta" - else: - # surface_temp diagnostic, N1280 file (1920, 2560) - file_name = f"umglaa_pa{numeric}-surfacetemp" - - file_suffices = { - "UM": "", # packed FF (WGDOS lbpack = 1) - "UM_lbpack0": ".uncompressed", # unpacked FF (lbpack = 0) - "UM_netcdf": ".nc", # UM file -> Iris -> NetCDF file - } - suffix = file_suffices[file_type] - - file_path = (BENCHMARK_DATA / file_name).with_suffix(suffix) - if not file_path.exists(): - message = "\n".join( - [ - f"Expected local file not found: {file_path}", - "Available from the UK Met Office.", - ] - ) - raise FileNotFoundError(message) - - self.file_path = file_path - self.file_type = file_type - - def load(self): - return load_cube(str(self.file_path)) diff --git a/benchmarks/benchmarks_iris/cperf/equality.py b/benchmarks/benchmarks_iris/cperf/equality.py deleted file mode 100644 index ffe61ef9387..00000000000 --- a/benchmarks/benchmarks_iris/cperf/equality.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Equality benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" - -from .. import on_demand_benchmark -from . import SingleDiagnosticMixin - - -class EqualityMixin(SingleDiagnosticMixin): - r"""Use :class:`SingleDiagnosticMixin` as the realistic case. - - Uses :class:`SingleDiagnosticMixin` as the realistic case will be comparing - :class:`~iris.cube.Cube`\\ s that have been loaded from file. - - """ - - # Cut down the parent parameters. - params = [["LFRic", "UM"]] - - def setup(self, file_type, three_d=False, three_times=False): - super().setup(file_type, three_d, three_times) - self.cube = self.load() - self.other_cube = self.load() - - -@on_demand_benchmark -class CubeEquality(EqualityMixin): - r"""Benchmark time & memory costs of comparing LFRic & UM :class:`~iris.cube.Cube`\\ s.""" - - def _comparison(self): - _ = self.cube == self.other_cube - - def peakmem_eq(self, file_type): - self._comparison() - - def time_eq(self, file_type): - self._comparison() - - -@on_demand_benchmark -class MeshEquality(EqualityMixin): - """Provides extra context for :class:`CubeEquality`.""" - - params = [["LFRic"]] - - def _comparison(self): - _ = self.cube.mesh == self.other_cube.mesh - - def peakmem_eq(self, file_type): - self._comparison() - - def time_eq(self, file_type): - self._comparison() diff --git a/benchmarks/benchmarks_iris/cperf/load.py b/benchmarks/benchmarks_iris/cperf/load.py deleted file mode 100644 index 07c2de9e79f..00000000000 --- a/benchmarks/benchmarks_iris/cperf/load.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File loading benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" - -from .. import on_demand_benchmark -from . import SingleDiagnosticMixin - - -@on_demand_benchmark -class SingleDiagnosticLoad(SingleDiagnosticMixin): - def time_load(self, _, __, ___): - """Perform a 'real world comparison'. - - * UM coords are always realised (DimCoords). - * LFRic coords are not realised by default (MeshCoords). - - """ - cube = self.load() - assert cube.has_lazy_data() - # UM files load lon/lat as DimCoords, which are always realised. - expecting_lazy_coords = self.file_type == "LFRic" - for coord_name in "longitude", "latitude": - coord = cube.coord(coord_name) - assert coord.has_lazy_points() == expecting_lazy_coords - assert coord.has_lazy_bounds() == expecting_lazy_coords - - def time_load_w_realised_coords(self, _, __, ___): - """Valuable extra comparison where both UM and LFRic coords are realised.""" - cube = self.load() - for coord_name in "longitude", "latitude": - coord = cube.coord(coord_name) - # Don't touch actual points/bounds objects - permanent - # realisation plays badly with ASV's re-run strategy. - if coord.has_lazy_points(): - coord.core_points().compute() - if coord.has_lazy_bounds(): - coord.core_bounds().compute() - - -@on_demand_benchmark -class SingleDiagnosticRealise(SingleDiagnosticMixin): - # The larger files take a long time to realise. - timeout = 600.0 - - def setup(self, file_type, three_d, three_times): - super().setup(file_type, three_d, three_times) - self.loaded_cube = self.load() - - def time_realise(self, _, __, ___): - # Don't touch loaded_cube.data - permanent realisation plays badly with - # ASV's re-run strategy. - assert self.loaded_cube.has_lazy_data() - self.loaded_cube.core_data().compute() diff --git a/benchmarks/benchmarks_iris/cperf/save.py b/benchmarks/benchmarks_iris/cperf/save.py deleted file mode 100644 index 6dcd0b3bcf9..00000000000 --- a/benchmarks/benchmarks_iris/cperf/save.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File saving benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.""" - -from iris import save - -from .. import on_demand_benchmark -from ..generate_data.ugrid import make_cube_like_2d_cubesphere, make_cube_like_umfield -from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX - - -@on_demand_benchmark -class NetcdfSave: - """Benchmark time and memory costs of saving ~large-ish data cubes to netcdf. - - Parametrised by file type. - - """ - - params = ["LFRic", "UM"] - param_names = ["data type"] - - def setup(self, data_type): - if data_type == "LFRic": - self.cube = make_cube_like_2d_cubesphere( - n_cube=_N_CUBESPHERE_UM_EQUIVALENT, with_mesh=True - ) - else: - self.cube = make_cube_like_umfield(_UM_DIMS_YX) - - def _save_data(self, cube): - save(cube, "tmp.nc") - - def time_save_data_netcdf(self, data_type): - self._save_data(self.cube) - - def tracemalloc_save_data_netcdf(self, data_type): - self._save_data(self.cube) diff --git a/benchmarks/benchmarks_iris/cube.py b/benchmarks/benchmarks_iris/cube.py deleted file mode 100644 index 0b6829ee2d3..00000000000 --- a/benchmarks/benchmarks_iris/cube.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Cube benchmark tests.""" - -from collections.abc import Iterable - -from iris import coords -from iris.cube import Cube - -from .generate_data.stock import realistic_4d_w_everything - - -class CubeCreation: - params = [[False, True], ["instantiate", "construct"]] - param_names = ["Cube has mesh", "Cube creation strategy"] - - cube_kwargs: dict - - def setup(self, w_mesh: bool, _) -> None: - # Loaded as two cubes due to the hybrid height. - source_cube = realistic_4d_w_everything(w_mesh=w_mesh) - - def get_coords_and_dims( - coords_iter: Iterable[coords._DimensionalMetadata], - ) -> list[tuple[coords._DimensionalMetadata, tuple[int, ...]]]: - return [(c, c.cube_dims(source_cube)) for c in coords_iter] - - self.cube_kwargs = dict( - data=source_cube.data, - standard_name=source_cube.standard_name, - long_name=source_cube.long_name, - var_name=source_cube.var_name, - units=source_cube.units, - attributes=source_cube.attributes, - cell_methods=source_cube.cell_methods, - dim_coords_and_dims=get_coords_and_dims(source_cube.dim_coords), - aux_coords_and_dims=get_coords_and_dims(source_cube.aux_coords), - aux_factories=source_cube.aux_factories, - cell_measures_and_dims=get_coords_and_dims(source_cube.cell_measures()), - ancillary_variables_and_dims=get_coords_and_dims( - source_cube.ancillary_variables() - ), - ) - - def time_create(self, _, cube_creation_strategy: str) -> None: - if cube_creation_strategy == "instantiate": - _ = Cube(**self.cube_kwargs) - - elif cube_creation_strategy == "construct": - new_cube = Cube(data=self.cube_kwargs["data"]) - new_cube.standard_name = self.cube_kwargs["standard_name"] - new_cube.long_name = self.cube_kwargs["long_name"] - new_cube.var_name = self.cube_kwargs["var_name"] - new_cube.units = self.cube_kwargs["units"] - new_cube.attributes = self.cube_kwargs["attributes"] - new_cube.cell_methods = self.cube_kwargs["cell_methods"] - for coord, dims in self.cube_kwargs["dim_coords_and_dims"]: - assert isinstance(coord, coords.DimCoord) # Type hint to help linters. - new_cube.add_dim_coord(coord, dims) - for coord, dims in self.cube_kwargs["aux_coords_and_dims"]: - new_cube.add_aux_coord(coord, dims) - for aux_factory in self.cube_kwargs["aux_factories"]: - new_cube.add_aux_factory(aux_factory) - for cell_measure, dims in self.cube_kwargs["cell_measures_and_dims"]: - new_cube.add_cell_measure(cell_measure, dims) - for ancillary_variable, dims in self.cube_kwargs[ - "ancillary_variables_and_dims" - ]: - new_cube.add_ancillary_variable(ancillary_variable, dims) - - else: - message = f"Unknown cube creation strategy: {cube_creation_strategy}" - raise NotImplementedError(message) - - -class CubeEquality: - params = [ - [False, True], - [False, True], - ["metadata_inequality", "coord_inequality", "data_inequality", "all_equal"], - ] - param_names = ["Cubes are lazy", "Cubes have meshes", "Scenario"] - - cube_1: Cube - cube_2: Cube - coord_name = "surface_altitude" - - def setup(self, lazy: bool, w_mesh: bool, scenario: str) -> None: - self.cube_1 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy) - # Using Cube.copy() produces different results due to sharing of the - # Mesh instance. - self.cube_2 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy) - - match scenario: - case "metadata_inequality": - self.cube_2.long_name = "different" - case "coord_inequality": - coord = self.cube_2.coord(self.coord_name) - coord.points = coord.core_points() * 2 - case "data_inequality": - self.cube_2.data = self.cube_2.core_data() * 2 - case "all_equal": - pass - case _: - message = f"Unknown scenario: {scenario}" - raise NotImplementedError(message) - - def time_equality(self, lazy: bool, __, ___) -> None: - _ = self.cube_1 == self.cube_2 - if lazy: - for cube in (self.cube_1, self.cube_2): - # Confirm that this benchmark is safe for repetition. - assert cube.coord(self.coord_name).has_lazy_points() - assert cube.has_lazy_data() diff --git a/benchmarks/benchmarks_iris/generate_data/__init__.py b/benchmarks/benchmarks_iris/generate_data/__init__.py deleted file mode 100644 index 9a3671389b1..00000000000 --- a/benchmarks/benchmarks_iris/generate_data/__init__.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Scripts for generating supporting data for benchmarking. - -Data generated using this repo should use :func:`run_function_elsewhere`, which -means that data is generated using a fixed version of this repo and a fixed -environment, rather than those that get changed when the benchmarking run -checks out a new commit. - -Downstream use of data generated 'elsewhere' requires saving; usually in a -NetCDF file. Could also use pickling but there is a potential risk if the -benchmark sequence runs over two different Python versions. - -""" - -from contextlib import contextmanager -from inspect import getsource -from os import environ -from pathlib import Path -from subprocess import CalledProcessError, check_output, run -from textwrap import dedent -from warnings import warn - -from iris._lazy_data import as_concrete_data -from iris.fileformats import netcdf - -#: Python executable used by :func:`run_function_elsewhere`, set via env -#: variable of same name. Must be path of Python within an environment that -#: includes this repo (including dependencies and test modules) and Mule. -try: - DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"] - _ = check_output([DATA_GEN_PYTHON, "-c", "a = True"]) -except KeyError: - error = "Env variable DATA_GEN_PYTHON not defined." - raise KeyError(error) -except (CalledProcessError, FileNotFoundError, PermissionError): - error = "Env variable DATA_GEN_PYTHON not a runnable python executable path." - raise ValueError(error) - -# The default location of data files used in benchmarks. Used by CI. -default_data_dir = (Path(__file__).parents[2] / ".data").resolve() -# Optionally override the default data location with environment variable. -BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir)) -if BENCHMARK_DATA == default_data_dir: - BENCHMARK_DATA.mkdir(exist_ok=True) - message = ( - f"No BENCHMARK_DATA env var, defaulting to {BENCHMARK_DATA}. " - "Note that some benchmark files are GB in size." - ) - warn(message) -elif not BENCHMARK_DATA.is_dir(): - message = f"Not a directory: {BENCHMARK_DATA} ." - raise ValueError(message) - -# Manual flag to allow the rebuilding of synthetic data. -# False forces a benchmark run to re-make all the data files. -REUSE_DATA = True - - -class DataGenerationError(Exception): - """Exception raised for errors during data generation.""" - - pass - - -def run_function_elsewhere(func_to_run, *args, **kwargs): - """Run a given function using the :const:`DATA_GEN_PYTHON` executable. - - This structure allows the function to be written natively. - - Parameters - ---------- - func_to_run : FunctionType - The function object to be run. - NOTE: the function must be completely self-contained, i.e. perform all - its own imports (within the target :const:`DATA_GEN_PYTHON` - environment). - *args : tuple, optional - Function call arguments. Must all be expressible as simple literals, - i.e. the ``repr`` must be a valid literal expression. - **kwargs: dict, optional - Function call keyword arguments. All values must be expressible as - simple literals (see ``*args``). - - Returns - ------- - str - The ``stdout`` from the run. - - """ - func_string = dedent(getsource(func_to_run)) - func_string = func_string.replace("@staticmethod\n", "") - func_call_term_strings = [repr(arg) for arg in args] - func_call_term_strings += [f"{name}={repr(val)}" for name, val in kwargs.items()] - func_call_string = ( - f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")" - ) - python_string = "\n".join([func_string, func_call_string]) - - try: - result = run( - [DATA_GEN_PYTHON, "-c", python_string], - capture_output=True, - check=True, - text=True, - ) - except CalledProcessError as error_: - # From None 'breaks' the error chain - we don't want the original - # traceback since it is long and confusing. - raise DataGenerationError(error_.stderr) from None - - return result.stdout - - -@contextmanager -def load_realised(): - """Force NetCDF loading with realised arrays. - - Since passing between data generation and benchmarking environments is via - file loading, but some benchmarks are only meaningful if starting with real - arrays. - """ - from iris.fileformats._nc_load_rules import helpers - from iris.fileformats.netcdf.loader import _get_cf_var_data as pre_patched - - def patched(*args, **kwargs): - return as_concrete_data(pre_patched(*args, **kwargs)) - - netcdf.loader._get_cf_var_data = patched - helpers._get_cf_var_data = patched - yield - netcdf.loader._get_cf_var_data = pre_patched - helpers._get_cf_var_data = pre_patched diff --git a/benchmarks/benchmarks_iris/generate_data/stock.py b/benchmarks/benchmarks_iris/generate_data/stock.py deleted file mode 100644 index 63970cd344d..00000000000 --- a/benchmarks/benchmarks_iris/generate_data/stock.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Wrappers for using :mod:`iris.tests.stock` methods for benchmarking. - -See :mod:`benchmarks.generate_data` for an explanation of this structure. -""" - -from contextlib import nullcontext -from hashlib import sha256 -import json -from pathlib import Path - -import iris -from iris import cube -from iris.mesh import load_mesh - -from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere - - -def hash_args(*args, **kwargs): - """Convert arguments into a short hash - for preserving args in filenames.""" - arg_string = str(args) - kwarg_string = json.dumps(kwargs) - full_string = arg_string + kwarg_string - return sha256(full_string.encode()).hexdigest()[:10] - - -def _create_file__xios_common(func_name, **kwargs): - def _external(func_name_, temp_file_dir, **kwargs_): - from iris.tests.stock import netcdf - - func = getattr(netcdf, func_name_) - print(func(temp_file_dir, **kwargs_), end="") - - args_hash = hash_args(**kwargs) - save_path = (BENCHMARK_DATA / f"{func_name}_{args_hash}").with_suffix(".nc") - if not REUSE_DATA or not save_path.is_file(): - # The xios functions take control of save location so need to move to - # a more specific name that allows reuse. - actual_path = run_function_elsewhere( - _external, - func_name_=func_name, - temp_file_dir=str(BENCHMARK_DATA), - **kwargs, - ) - Path(actual_path).replace(save_path) - return save_path - - -def create_file__xios_2d_face_half_levels( - temp_file_dir, dataset_name, n_faces=866, n_times=1 -): - """Create file wrapper for :meth:`iris.tests.stock.netcdf.create_file__xios_2d_face_half_levels`. - - Have taken control of temp_file_dir - - todo: is create_file__xios_2d_face_half_levels still appropriate now we can - properly save Mesh Cubes? - """ - return _create_file__xios_common( - func_name="create_file__xios_2d_face_half_levels", - dataset_name=dataset_name, - n_faces=n_faces, - n_times=n_times, - ) - - -def create_file__xios_3d_face_half_levels( - temp_file_dir, dataset_name, n_faces=866, n_times=1, n_levels=38 -): - """Create file wrapper for :meth:`iris.tests.stock.netcdf.create_file__xios_3d_face_half_levels`. - - Have taken control of temp_file_dir - - todo: is create_file__xios_3d_face_half_levels still appropriate now we can - properly save Mesh Cubes? - """ - return _create_file__xios_common( - func_name="create_file__xios_3d_face_half_levels", - dataset_name=dataset_name, - n_faces=n_faces, - n_times=n_times, - n_levels=n_levels, - ) - - -def sample_mesh(n_nodes=None, n_faces=None, n_edges=None, lazy_values=False): - """Sample mesh wrapper for :meth:iris.tests.stock.mesh.sample_mesh`.""" - - def _external(*args, **kwargs): - from iris.mesh import save_mesh - from iris.tests.stock.mesh import sample_mesh - - save_path_ = kwargs.pop("save_path") - # Always saving, so laziness is irrelevant. Use lazy to save time. - kwargs["lazy_values"] = True - new_mesh = sample_mesh(*args, **kwargs) - save_mesh(new_mesh, save_path_) - - arg_list = [n_nodes, n_faces, n_edges] - args_hash = hash_args(*arg_list) - save_path = (BENCHMARK_DATA / f"sample_mesh_{args_hash}").with_suffix(".nc") - if not REUSE_DATA or not save_path.is_file(): - _ = run_function_elsewhere(_external, *arg_list, save_path=str(save_path)) - if not lazy_values: - # Realise everything. - with load_realised(): - mesh = load_mesh(str(save_path)) - else: - mesh = load_mesh(str(save_path)) - return mesh - - -def sample_meshcoord(sample_mesh_kwargs=None, location="face", axis="x"): - """Sample meshcoord wrapper for :meth:`iris.tests.stock.mesh.sample_meshcoord`. - - Parameters deviate from the original as cannot pass a - :class:`iris.mesh.Mesh to the separate Python instance - must - instead generate the Mesh as well. - - MeshCoords cannot be saved to file, so the _external method saves the - MeshCoord's Mesh, then the original Python instance loads in that Mesh and - regenerates the MeshCoord from there. - """ - - def _external(sample_mesh_kwargs_, save_path_): - from iris.mesh import save_mesh - from iris.tests.stock.mesh import sample_mesh, sample_meshcoord - - if sample_mesh_kwargs_: - input_mesh = sample_mesh(**sample_mesh_kwargs_) - else: - input_mesh = None - # Don't parse the location or axis arguments - only saving the Mesh at - # this stage. - new_meshcoord = sample_meshcoord(mesh=input_mesh) - save_mesh(new_meshcoord.mesh, save_path_) - - args_hash = hash_args(**sample_mesh_kwargs) - save_path = (BENCHMARK_DATA / f"sample_mesh_coord_{args_hash}").with_suffix(".nc") - if not REUSE_DATA or not save_path.is_file(): - _ = run_function_elsewhere( - _external, - sample_mesh_kwargs_=sample_mesh_kwargs, - save_path_=str(save_path), - ) - with load_realised(): - source_mesh = load_mesh(str(save_path)) - # Regenerate MeshCoord from its Mesh, which we saved. - return source_mesh.to_MeshCoord(location=location, axis=axis) - - -def realistic_4d_w_everything(w_mesh=False, lazy=False) -> iris.cube.Cube: - """Run :func:`iris.tests.stock.realistic_4d_w_everything` in ``DATA_GEN_PYTHON``. - - Parameters - ---------- - w_mesh : bool - See :func:`iris.tests.stock.realistic_4d_w_everything` for details. - lazy : bool - If True, the Cube will be returned with all arrays as they would - normally be loaded from file (i.e. most will still be lazy Dask - arrays). If False, all arrays (except derived coordinates) will be - realised NumPy arrays. - - """ - - def _external(w_mesh_: str, save_path_: str): - import iris - from iris.tests.stock import realistic_4d_w_everything - - cube = realistic_4d_w_everything(w_mesh=bool(w_mesh_)) - iris.save(cube, save_path_) - - save_path = (BENCHMARK_DATA / f"realistic_4d_w_everything_{w_mesh}").with_suffix( - ".nc" - ) - if not REUSE_DATA or not save_path.is_file(): - _ = run_function_elsewhere(_external, w_mesh_=w_mesh, save_path_=str(save_path)) - context = nullcontext() if lazy else load_realised() - with context: - return iris.load_cube(save_path, "air_potential_temperature") diff --git a/benchmarks/benchmarks_iris/generate_data/ugrid.py b/benchmarks/benchmarks_iris/generate_data/ugrid.py deleted file mode 100644 index 2cef4752eee..00000000000 --- a/benchmarks/benchmarks_iris/generate_data/ugrid.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Scripts for generating supporting data for UGRID-related benchmarking.""" - -from iris import load_cube as iris_loadcube - -from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere -from .stock import ( - create_file__xios_2d_face_half_levels, - create_file__xios_3d_face_half_levels, -) - - -def generate_cube_like_2d_cubesphere(n_cube: int, with_mesh: bool, output_path: str): - """Construct and save to file an LFRIc cubesphere-like cube. - - Construct and save to file an LFRIc cubesphere-like cube for a given - cubesphere size, *or* a simpler structured (UM-like) cube of equivalent - size. - - NOTE: this function is *NEVER* called from within this actual package. - Instead, it is to be called via benchmarks.remote_data_generation, - so that it can use up-to-date facilities, independent of the ASV controlled - environment which contains the "Iris commit under test". - - This means: - - * it must be completely self-contained : i.e. it includes all its - own imports, and saves results to an output file. - - """ - from iris import save - from iris.tests.stock.mesh import sample_mesh, sample_mesh_cube - - n_face_nodes = n_cube * n_cube - n_faces = 6 * n_face_nodes - - # Set n_nodes=n_faces and n_edges=2*n_faces - # : Not exact, but similar to a 'real' cubesphere. - n_nodes = n_faces - n_edges = 2 * n_faces - if with_mesh: - mesh = sample_mesh( - n_nodes=n_nodes, n_faces=n_faces, n_edges=n_edges, lazy_values=True - ) - cube = sample_mesh_cube(mesh=mesh, n_z=1) - else: - cube = sample_mesh_cube(nomesh_faces=n_faces, n_z=1) - - # Strip off the 'extra' aux-coord mapping the mesh, which sample-cube adds - # but which we don't want. - cube.remove_coord("mesh_face_aux") - - # Save the result to a named file. - save(cube, output_path) - - -def make_cube_like_2d_cubesphere(n_cube: int, with_mesh: bool): - """Generate an LFRIc cubesphere-like cube. - - Generate an LFRIc cubesphere-like cube for a given cubesphere size, - *or* a simpler structured (UM-like) cube of equivalent size. - - All the cube data, coords and mesh content are LAZY, and produced without - allocating large real arrays (to allow peak-memory testing). - - NOTE: the actual cube generation is done in a stable Iris environment via - benchmarks.remote_data_generation, so it is all channeled via cached netcdf - files in our common testdata directory. - - """ - identifying_filename = f"cube_like_2d_cubesphere_C{n_cube}_Mesh={with_mesh}.nc" - filepath = BENCHMARK_DATA / identifying_filename - if not filepath.exists(): - # Create the required testfile, by running the generation code remotely - # in a 'fixed' python environment. - run_function_elsewhere( - generate_cube_like_2d_cubesphere, - n_cube, - with_mesh=with_mesh, - output_path=str(filepath), - ) - - # File now *should* definitely exist: content is simply the desired cube. - cube = iris_loadcube(str(filepath)) - - # Ensure correct laziness. - _ = cube.data - for coord in cube.coords(mesh_coords=False): - assert not coord.has_lazy_points() - assert not coord.has_lazy_bounds() - if cube.mesh: - for coord in cube.mesh.coords(): - assert coord.has_lazy_points() - for conn in cube.mesh.connectivities(): - assert conn.has_lazy_indices() - - return cube - - -def make_cube_like_umfield(xy_dims): - """Create a "UM-like" cube with lazy content, for save performance testing. - - Roughly equivalent to a single current UM cube, to be compared with - a "make_cube_like_2d_cubesphere(n_cube=_N_CUBESPHERE_UM_EQUIVALENT)" - (see below). - - Note: probably a bit over-simplified, as there is no time coord, but that - is probably equally true of our LFRic-style synthetic data. - - Parameters - ---------- - xy_dims : 2-tuple - Set the horizontal dimensions = n-lats, n-lons. - - """ - - def _external(xy_dims_, save_path_): - from dask import array as da - import numpy as np - - from iris import save - from iris.coords import DimCoord - from iris.cube import Cube - - nz, ny, nx = (1,) + xy_dims_ - - # Base data : Note this is float32 not float64 like LFRic/XIOS outputs. - lazy_data = da.zeros((nz, ny, nx), dtype=np.float32) - cube = Cube(lazy_data, long_name="structured_phenom") - - # Add simple dim coords also. - z_dimco = DimCoord(np.arange(nz), long_name="level", units=1) - y_dimco = DimCoord( - np.linspace(-90.0, 90.0, ny), - standard_name="latitude", - units="degrees", - ) - x_dimco = DimCoord( - np.linspace(-180.0, 180.0, nx), - standard_name="longitude", - units="degrees", - ) - for idim, co in enumerate([z_dimco, y_dimco, x_dimco]): - cube.add_dim_coord(co, idim) - - save(cube, save_path_) - - save_path = (BENCHMARK_DATA / f"make_cube_like_umfield_{xy_dims}").with_suffix( - ".nc" - ) - if not REUSE_DATA or not save_path.is_file(): - _ = run_function_elsewhere(_external, xy_dims, str(save_path)) - with load_realised(): - cube = iris_loadcube(str(save_path)) - - return cube - - -def make_cubesphere_testfile(c_size, n_levels=0, n_times=1): - """Build a C cubesphere testfile in a given directory. - - Build a C cubesphere testfile in a given directory, with a standard naming. - If n_levels > 0 specified: 3d file with the specified number of levels. - Return the file path. - - TODO: is create_file__xios... still appropriate now we can properly save Mesh Cubes? - - """ - n_faces = 6 * c_size * c_size - stem_name = f"mesh_cubesphere_C{c_size}_t{n_times}" - kwargs = dict( - temp_file_dir=None, - dataset_name=stem_name, # N.B. function adds the ".nc" extension - n_times=n_times, - n_faces=n_faces, - ) - - three_d = n_levels > 0 - if three_d: - kwargs["n_levels"] = n_levels - kwargs["dataset_name"] += f"_{n_levels}levels" - func = create_file__xios_3d_face_half_levels - else: - func = create_file__xios_2d_face_half_levels - - file_path = func(**kwargs) - return file_path diff --git a/benchmarks/benchmarks_iris/generate_data/um_files.py b/benchmarks/benchmarks_iris/generate_data/um_files.py deleted file mode 100644 index e2bab6b2748..00000000000 --- a/benchmarks/benchmarks_iris/generate_data/um_files.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Generate FF, PP and NetCDF files based on a minimal synthetic FF file. - -NOTE: uses the Mule package, so depends on an environment with Mule installed. -""" - - -def _create_um_files( - len_x: int, len_y: int, len_z: int, len_t: int, compress, save_paths: dict -) -> None: - """Generate an FF object of given shape and compression, save to FF/PP/NetCDF. - - This is run externally - (:func:`benchmarks.generate_data.run_function_elsewhere`), so all imports - are self-contained and input parameters are simple types. - """ - from copy import deepcopy - from datetime import datetime - from tempfile import NamedTemporaryFile - - from mule import ArrayDataProvider, Field3, FieldsFile - import mule.ff - from mule.pp import fields_to_pp_file - import numpy as np - - from iris import load_cube - from iris import save as save_cube - - def to_bytes_patch(self, field): - data = field.get_data() - dtype = mule.ff._DATA_DTYPES[self.WORD_SIZE][field.lbuser1] - data = data.astype(dtype) - return data.tobytes(), data.size - - # TODO: remove this patch when fixed in mule, see https://github.com/MetOffice/simulation-systems/discussions/389 - mule.ff._WriteFFOperatorUnpacked.to_bytes = to_bytes_patch - - template = { - "fixed_length_header": {"dataset_type": 3, "grid_staggering": 3}, - "integer_constants": { - "num_p_levels": len_z, - "num_cols": len_x, - "num_rows": len_y, - }, - "real_constants": {}, - "level_dependent_constants": {"dims": (len_z + 1, None)}, - } - new_ff = FieldsFile.from_template(deepcopy(template)) - - data_array = np.arange(len_x * len_y).reshape(len_x, len_y) - array_provider = ArrayDataProvider(data_array) - - def add_field(level_: int, time_step_: int) -> None: - """Add a minimal field to the new :class:`~mule.FieldsFile`. - - Includes the minimum information to allow Mule saving and Iris - loading, as well as incrementation for vertical levels and time - steps to allow generation of z and t dimensions. - """ - new_field = Field3.empty() - # To correspond to the header-release 3 class used. - new_field.lbrel = 3 - # Mule uses the first element of the lookup to test for - # unpopulated fields (and skips them), so the first element should - # be set to something. The year will do. - new_field.raw[1] = datetime.now().year - - # Horizontal. - new_field.lbcode = 1 - new_field.lbnpt = len_x - new_field.lbrow = len_y - new_field.bdx = new_ff.real_constants.col_spacing - new_field.bdy = new_ff.real_constants.row_spacing - new_field.bzx = new_ff.real_constants.start_lon - 0.5 * new_field.bdx - new_field.bzy = new_ff.real_constants.start_lat - 0.5 * new_field.bdy - - # Hemisphere. - new_field.lbhem = 32 - # Processing. - new_field.lbproc = 0 - - # Vertical. - # Hybrid height values by simulating sequences similar to those in a - # theta file. - new_field.lbvc = 65 - if level_ == 0: - new_field.lblev = 9999 - else: - new_field.lblev = level_ - - level_1 = level_ + 1 - six_rec = 20 / 3 - three_rec = six_rec / 2 - - new_field.blev = level_1**2 * six_rec - six_rec - new_field.brsvd1 = level_1**2 * six_rec + (six_rec * level_1) - three_rec - - brsvd2_simulated = np.linspace(0.995, 0, len_z) - shift = min(len_z, 2) - bhrlev_simulated = np.concatenate([np.ones(shift), brsvd2_simulated[:-shift]]) - new_field.brsvd2 = brsvd2_simulated[level_] - new_field.bhrlev = bhrlev_simulated[level_] - - # Time. - new_field.lbtim = 11 - - new_field.lbyr = time_step_ - for attr_name in ["lbmon", "lbdat", "lbhr", "lbmin", "lbsec"]: - setattr(new_field, attr_name, 0) - - new_field.lbyrd = time_step_ + 1 - for attr_name in ["lbmond", "lbdatd", "lbhrd", "lbmind", "lbsecd"]: - setattr(new_field, attr_name, 0) - - # Data and packing. - new_field.lbuser1 = 1 - new_field.lbpack = int(compress) - new_field.bacc = 0 - new_field.bmdi = -1 - new_field.lbext = 0 - new_field.set_data_provider(array_provider) - - new_ff.fields.append(new_field) - - for time_step in range(len_t): - for level in range(len_z): - add_field(level, time_step + 1) - - ff_path = save_paths.get("FF", None) - pp_path = save_paths.get("PP", None) - nc_path = save_paths.get("NetCDF", None) - - if ff_path: - new_ff.to_file(ff_path) - if pp_path: - fields_to_pp_file(str(pp_path), new_ff.fields) - if nc_path: - temp_ff_path = None - # Need an Iris Cube from the FF content. - if ff_path: - # Use the existing file. - ff_cube = load_cube(ff_path) - else: - # Make a temporary file. - temp_ff_path = NamedTemporaryFile() - new_ff.to_file(temp_ff_path.name) - ff_cube = load_cube(temp_ff_path.name) - - save_cube(ff_cube, nc_path, zlib=compress) - if temp_ff_path: - temp_ff_path.close() - - -FILE_EXTENSIONS = {"FF": "", "PP": ".pp", "NetCDF": ".nc"} - - -def create_um_files( - len_x: int, - len_y: int, - len_z: int, - len_t: int, - compress: bool, - file_types: list, -) -> dict: - """Generate FF-based FF / PP / NetCDF files with specified shape and compression. - - All files representing a given shape are saved in a dedicated directory. A - dictionary of the saved paths is returned. - - If the required files exist, they are re-used, unless - :const:`benchmarks.REUSE_DATA` is ``False``. - """ - # Self contained imports to avoid linting confusion with _create_um_files(). - from . import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere - - save_name_sections = ["UM", len_x, len_y, len_z, len_t] - save_name = "_".join(str(section) for section in save_name_sections) - save_dir = BENCHMARK_DATA / save_name - if not save_dir.is_dir(): - save_dir.mkdir(parents=True) - - save_paths = {} - files_exist = True - for file_type in file_types: - file_ext = FILE_EXTENSIONS[file_type] - save_path = (save_dir / f"{compress}").with_suffix(file_ext) - files_exist = files_exist and save_path.is_file() - save_paths[file_type] = str(save_path) - - if not REUSE_DATA or not files_exist: - _ = run_function_elsewhere( - _create_um_files, len_x, len_y, len_z, len_t, compress, save_paths - ) - - return save_paths diff --git a/benchmarks/benchmarks_iris/import_iris.py b/benchmarks/benchmarks_iris/import_iris.py deleted file mode 100644 index ff5f19e4211..00000000000 --- a/benchmarks/benchmarks_iris/import_iris.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. - -"""Import iris benchmarking.""" - -from importlib import import_module, reload - -################ -# Prepare info for reset_colormaps: - -# Import and capture colormaps. -from matplotlib import colormaps # isort:skip - -_COLORMAPS_ORIG = set(colormaps) - -# Import iris.palette, which modifies colormaps. -import iris.palette - -# Derive which colormaps have been added by iris.palette. -_COLORMAPS_MOD = set(colormaps) -COLORMAPS_EXTRA = _COLORMAPS_MOD - _COLORMAPS_ORIG - -# Touch iris.palette to prevent linters complaining. -_ = iris.palette - -################ - - -class Iris: - @staticmethod - def _import(module_name, reset_colormaps=False): - """Have experimented with adding sleep() commands into the imported modules. - - The results reveal: - - ASV avoids invoking `import x` if nothing gets called in the - benchmark (some imports were timed, but only those where calls - happened during import). - - Using reload() is not identical to importing, but does produce - results that are very close to expected import times, so this is fine - for monitoring for regressions. - It is also ideal for accurate repetitions, without the need to mess - with the ASV `number` attribute etc, since cached imports are not used - and the repetitions are therefore no faster than the first run. - """ - mod = import_module(module_name) - - if reset_colormaps: - # Needed because reload() will attempt to register new colormaps a - # second time, which errors by default. - for cm_name in COLORMAPS_EXTRA: - colormaps.unregister(cm_name) - - reload(mod) - - def time_iris(self): - self._import("iris") - - def time__concatenate(self): - self._import("iris._concatenate") - - def time__constraints(self): - self._import("iris._constraints") - - def time__data_manager(self): - self._import("iris._data_manager") - - def time__deprecation(self): - self._import("iris._deprecation") - - def time__lazy_data(self): - self._import("iris._lazy_data") - - def time__merge(self): - self._import("iris._merge") - - def time__representation(self): - self._import("iris._representation") - - def time_analysis(self): - self._import("iris.analysis") - - def time_analysis__area_weighted(self): - self._import("iris.analysis._area_weighted") - - def time_analysis__grid_angles(self): - self._import("iris.analysis._grid_angles") - - def time_analysis__interpolation(self): - self._import("iris.analysis._interpolation") - - def time_analysis__regrid(self): - self._import("iris.analysis._regrid") - - def time_analysis__scipy_interpolate(self): - self._import("iris.analysis._scipy_interpolate") - - def time_analysis_calculus(self): - self._import("iris.analysis.calculus") - - def time_analysis_cartography(self): - self._import("iris.analysis.cartography") - - def time_analysis_geomerty(self): - self._import("iris.analysis.geometry") - - def time_analysis_maths(self): - self._import("iris.analysis.maths") - - def time_analysis_stats(self): - self._import("iris.analysis.stats") - - def time_analysis_trajectory(self): - self._import("iris.analysis.trajectory") - - def time_aux_factory(self): - self._import("iris.aux_factory") - - def time_common(self): - self._import("iris.common") - - def time_common_lenient(self): - self._import("iris.common.lenient") - - def time_common_metadata(self): - self._import("iris.common.metadata") - - def time_common_mixin(self): - self._import("iris.common.mixin") - - def time_common_resolve(self): - self._import("iris.common.resolve") - - def time_config(self): - self._import("iris.config") - - def time_coord_categorisation(self): - self._import("iris.coord_categorisation") - - def time_coord_systems(self): - self._import("iris.coord_systems") - - def time_coords(self): - self._import("iris.coords") - - def time_cube(self): - self._import("iris.cube") - - def time_exceptions(self): - self._import("iris.exceptions") - - def time_experimental(self): - self._import("iris.experimental") - - def time_fileformats(self): - self._import("iris.fileformats") - - def time_fileformats__ff(self): - self._import("iris.fileformats._ff") - - def time_fileformats__ff_cross_references(self): - self._import("iris.fileformats._ff_cross_references") - - def time_fileformats__pp_lbproc_pairs(self): - self._import("iris.fileformats._pp_lbproc_pairs") - - def time_fileformats_structured_array_identification(self): - self._import("iris.fileformats._structured_array_identification") - - def time_fileformats_abf(self): - self._import("iris.fileformats.abf") - - def time_fileformats_cf(self): - self._import("iris.fileformats.cf") - - def time_fileformats_dot(self): - self._import("iris.fileformats.dot") - - def time_fileformats_name(self): - self._import("iris.fileformats.name") - - def time_fileformats_name_loaders(self): - self._import("iris.fileformats.name_loaders") - - def time_fileformats_netcdf(self): - self._import("iris.fileformats.netcdf") - - def time_fileformats_nimrod(self): - self._import("iris.fileformats.nimrod") - - def time_fileformats_nimrod_load_rules(self): - self._import("iris.fileformats.nimrod_load_rules") - - def time_fileformats_pp(self): - self._import("iris.fileformats.pp") - - def time_fileformats_pp_load_rules(self): - self._import("iris.fileformats.pp_load_rules") - - def time_fileformats_pp_save_rules(self): - self._import("iris.fileformats.pp_save_rules") - - def time_fileformats_rules(self): - self._import("iris.fileformats.rules") - - def time_fileformats_um(self): - self._import("iris.fileformats.um") - - def time_fileformats_um__fast_load(self): - self._import("iris.fileformats.um._fast_load") - - def time_fileformats_um__fast_load_structured_fields(self): - self._import("iris.fileformats.um._fast_load_structured_fields") - - def time_fileformats_um__ff_replacement(self): - self._import("iris.fileformats.um._ff_replacement") - - def time_fileformats_um__optimal_array_structuring(self): - self._import("iris.fileformats.um._optimal_array_structuring") - - def time_fileformats_um_cf_map(self): - self._import("iris.fileformats.um_cf_map") - - def time_io(self): - self._import("iris.io") - - def time_io_format_picker(self): - self._import("iris.io.format_picker") - - def time_iterate(self): - self._import("iris.iterate") - - def time_palette(self): - self._import("iris.palette", reset_colormaps=True) - - def time_plot(self): - self._import("iris.plot") - - def time_quickplot(self): - self._import("iris.quickplot") - - def time_std_names(self): - self._import("iris.std_names") - - def time_symbols(self): - self._import("iris.symbols") - - def time_tests(self): - self._import("iris.tests") - - def time_time(self): - self._import("iris.time") - - def time_util(self): - self._import("iris.util") - - # third-party imports - - def time_third_party_cartopy(self): - self._import("cartopy") - - def time_third_party_cf_units(self): - self._import("cf_units") - - def time_third_party_cftime(self): - self._import("cftime") - - def time_third_party_matplotlib(self): - self._import("matplotlib") - - def time_third_party_numpy(self): - self._import("numpy") - - def time_third_party_scipy(self): - self._import("scipy") diff --git a/benchmarks/benchmarks_iris/iterate.py b/benchmarks/benchmarks_iris/iterate.py deleted file mode 100644 index 664bcf8ba2c..00000000000 --- a/benchmarks/benchmarks_iris/iterate.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Iterate benchmark tests.""" - -import numpy as np - -from iris import coords, cube, iterate - - -class IZip: - def setup(self): - data_2d = np.zeros((1000,) * 2) - data_1d = data_2d[0] - local_cube = cube.Cube(data_2d) - coord_a = coords.AuxCoord(points=data_1d, long_name="a") - coord_b = coords.AuxCoord(points=data_1d, long_name="b") - self.coord_names = (coord.long_name for coord in (coord_a, coord_b)) - - local_cube.add_aux_coord(coord_a, 0) - local_cube.add_aux_coord(coord_b, 1) - self.cube = local_cube - - def time_izip(self): - iterate.izip(self.cube, coords=self.coord_names) diff --git a/benchmarks/benchmarks_iris/load/__init__.py b/benchmarks/benchmarks_iris/load/__init__.py deleted file mode 100644 index 5c5a62a5151..00000000000 --- a/benchmarks/benchmarks_iris/load/__init__.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File loading benchmark tests.""" - -from iris import AttributeConstraint, Constraint, load, load_cube -from iris.cube import Cube -from iris.fileformats.um import structured_um_loading - -from ..generate_data import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere -from ..generate_data.um_files import create_um_files - - -class LoadAndRealise: - # For data generation - timeout = 600.0 - params = ( - [(50, 50, 2), (1280, 960, 5), (2, 2, 1000)], - [False, True], - ["FF", "PP", "NetCDF"], - ) - param_names = ["xyz", "compressed", "file_format"] - - def setup_cache(self) -> dict: - file_type_args = self.params[2] - file_path_dict: dict[tuple[int, int, int], dict[bool, dict[str, str]]] = {} - for xyz in self.params[0]: - file_path_dict[xyz] = {} - x, y, z = xyz - for compress in self.params[1]: - file_path_dict[xyz][compress] = create_um_files( - x, y, z, 1, compress, file_type_args - ) - return file_path_dict - - def setup( - self, - file_path_dict: dict, - xyz: tuple, - compress: bool, - file_format: str, - ) -> None: - self.file_path = file_path_dict[xyz][compress][file_format] - self.cube = self.load() - - def load(self) -> Cube: - return load_cube(self.file_path) - - def time_load(self, _, __, ___, ____) -> None: - _ = self.load() - - def time_realise(self, _, __, ___, ____) -> None: - # Don't touch cube.data - permanent realisation plays badly with ASV's - # re-run strategy. - assert self.cube.has_lazy_data() - self.cube.core_data().compute() - - -class STASHConstraint: - # xyz sizes mimic LoadAndRealise to maximise file reuse. - params = ([(2, 2, 2), (1280, 960, 5), (2, 2, 1000)], ["FF", "PP"]) - param_names = ["xyz", "file_format"] - - def setup_cache(self) -> dict: - file_type_args = self.params[1] - file_path_dict = {} - for xyz in self.params[0]: - x, y, z = xyz - file_path_dict[xyz] = create_um_files(x, y, z, 1, False, file_type_args) - return file_path_dict - - def setup(self, file_path_dict: dict, xyz: tuple, file_format: str) -> None: - self.file_path = file_path_dict[xyz][file_format] - - def time_stash_constraint(self, _, __, ___) -> None: - _ = load_cube(self.file_path, AttributeConstraint(STASH="m??s??i901")) - - -class TimeConstraint: - params = ([3, 20], ["FF", "PP", "NetCDF"]) - param_names = ["time_dim_len", "file_format"] - - def setup_cache(self) -> dict: - file_type_args = self.params[1] - file_path_dict = {} - for time_dim_len in self.params[0]: - file_path_dict[time_dim_len] = create_um_files( - 20, 20, 5, time_dim_len, False, file_type_args - ) - return file_path_dict - - def setup(self, file_path_dict: dict, time_dim_len: int, file_format: str) -> None: - self.file_path = file_path_dict[time_dim_len][file_format] - self.time_constr = Constraint(time=lambda cell: cell.point.year < 3) - - def time_time_constraint(self, _, __, ___) -> None: - _ = load_cube(self.file_path, self.time_constr) - - -class ManyVars: - FILE_PATH = BENCHMARK_DATA / "many_var_file.nc" - - @staticmethod - def _create_file(save_path: str) -> None: - """Run externally - everything must be self-contained.""" - import numpy as np - - from iris import save - from iris.coords import AuxCoord - from iris.cube import Cube - - data_len = 8 - data = np.arange(data_len) - cube = Cube(data, units="unknown") - extra_vars = 80 - names = ["coord_" + str(i) for i in range(extra_vars)] - for name in names: - coord = AuxCoord(data, long_name=name, units="unknown") - cube.add_aux_coord(coord, 0) - save(cube, save_path) - - def setup_cache(self) -> None: - if not REUSE_DATA or not self.FILE_PATH.is_file(): - # See :mod:`benchmarks.generate_data` docstring for full explanation. - _ = run_function_elsewhere( - self._create_file, - str(self.FILE_PATH), - ) - - def time_many_var_load(self) -> None: - _ = load(str(self.FILE_PATH)) - - -class ManyCubes: - FILE_PATH = BENCHMARK_DATA / "many_cube_file.nc" - - @staticmethod - def _create_file(save_path: str) -> None: - """Run externally - everything must be self-contained.""" - import numpy as np - - from iris import save - from iris.coords import AuxCoord, DimCoord - from iris.cube import Cube, CubeList - - data_len = 81920 - bnds_len = 3 - data = np.arange(data_len).astype(np.float32) - bnds_data = ( - np.arange(data_len * bnds_len) - .astype(np.float32) - .reshape(data_len, bnds_len) - ) - time = DimCoord(np.array([0]), standard_name="time") - lat = AuxCoord( - data, bounds=bnds_data, standard_name="latitude", units="degrees" - ) - lon = AuxCoord( - data, bounds=bnds_data, standard_name="longitude", units="degrees" - ) - cube = Cube(data.reshape(1, -1), units="unknown") - cube.add_dim_coord(time, 0) - cube.add_aux_coord(lat, 1) - cube.add_aux_coord(lon, 1) - - n_cubes = 100 - cubes = CubeList() - for i in range(n_cubes): - cube = cube.copy() - cube.long_name = f"var_{i}" - cubes.append(cube) - save(cubes, save_path) - - def setup_cache(self) -> None: - if not REUSE_DATA or not self.FILE_PATH.is_file(): - # See :mod:`benchmarks.generate_data` docstring for full explanation. - _ = run_function_elsewhere( - self._create_file, - str(self.FILE_PATH), - ) - - def time_many_cube_load(self) -> None: - _ = load(str(self.FILE_PATH)) - - -class StructuredFF: - """Test structured loading of a large-ish fieldsfile. - - Structured load of the larger size should show benefit over standard load, - avoiding the cost of merging. - """ - - params = ([(2, 2, 2), (1280, 960, 5), (2, 2, 1000)], [False, True]) - param_names = ["xyz", "structured_loading"] - - def setup_cache(self) -> dict: - file_path_dict = {} - for xyz in self.params[0]: - x, y, z = xyz - file_path_dict[xyz] = create_um_files(x, y, z, 1, False, ["FF"]) - return file_path_dict - - def setup(self, file_path_dict, xyz, structured_load): - self.file_path = file_path_dict[xyz]["FF"] - self.structured_load = structured_load - - def load(self): - """Load the whole file (in fact there is only 1 cube).""" - - def _load(): - _ = load(self.file_path) - - if self.structured_load: - with structured_um_loading(): - _load() - else: - _load() - - def time_structured_load(self, _, __, ___): - self.load() diff --git a/benchmarks/benchmarks_iris/load/ugrid.py b/benchmarks/benchmarks_iris/load/ugrid.py deleted file mode 100644 index 5ad0086ef36..00000000000 --- a/benchmarks/benchmarks_iris/load/ugrid.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Mesh data loading benchmark tests.""" - -from iris import load_cube as iris_load_cube -from iris.mesh import load_mesh as iris_load_mesh - -from ..generate_data.stock import create_file__xios_2d_face_half_levels - - -def synthetic_data(**kwargs): - # Ensure all uses of the synthetic data function use the common directory. - # File location is controlled by :mod:`generate_data`, hence temp_file_dir=None. - return create_file__xios_2d_face_half_levels(temp_file_dir=None, **kwargs) - - -def load_cube(*args, **kwargs): - return iris_load_cube(*args, **kwargs) - - -def load_mesh(*args, **kwargs): - return iris_load_mesh(*args, **kwargs) - - -class BasicLoading: - params = [1, int(2e5)] - param_names = ["number of faces"] - - def setup_common(self, **kwargs): - self.data_path = synthetic_data(**kwargs) - - def setup(self, *args): - self.setup_common(dataset_name="Loading", n_faces=args[0]) - - def time_load_file(self, *args): - _ = load_cube(str(self.data_path)) - - def time_load_mesh(self, *args): - _ = load_mesh(str(self.data_path)) - - -class BasicLoadingTime(BasicLoading): - """Same as BasicLoading, but scaling over a time series - an unlimited dimension.""" - - # NOTE iris#4834 - careful how big the time dimension is (time dimension - # is UNLIMITED). - - param_names = ["number of time steps"] - - def setup(self, *args): - self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0]) - - -class DataRealisation: - # Prevent repeat runs between setup() runs - data won't be lazy after 1st. - number = 1 - # Compensate for reduced certainty by increasing number of repeats. - repeat = (10, 10, 10.0) - # Prevent ASV running its warmup, which ignores `number` and would - # therefore get a false idea of typical run time since the data would stop - # being lazy. - warmup_time = 0.0 - timeout = 300.0 - - params = [int(1e4), int(2e5)] - param_names = ["number of faces"] - - def setup_common(self, **kwargs): - data_path = synthetic_data(**kwargs) - self.cube = load_cube(str(data_path)) - - def setup(self, *args): - self.setup_common(dataset_name="Realisation", n_faces=args[0]) - - def time_realise_data(self, *args): - assert self.cube.has_lazy_data() - _ = self.cube.data[0] - - -class DataRealisationTime(DataRealisation): - """Same as DataRealisation, but scaling over a time series - an unlimited dimension.""" - - param_names = ["number of time steps"] - - def setup(self, *args): - self.setup_common(dataset_name="Realisation", n_faces=1, n_times=args[0]) - - -class Callback: - params = [1, int(2e5)] - param_names = ["number of faces"] - - def setup_common(self, **kwargs): - def callback(cube, field, filename): - return cube[::2] - - self.data_path = synthetic_data(**kwargs) - self.callback = callback - - def setup(self, *args): - self.setup_common(dataset_name="Loading", n_faces=args[0]) - - def time_load_file_callback(self, *args): - _ = load_cube(str(self.data_path), callback=self.callback) - - -class CallbackTime(Callback): - """Same as Callback, but scaling over a time series - an unlimited dimension.""" - - param_names = ["number of time steps"] - - def setup(self, *args): - self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0]) diff --git a/benchmarks/benchmarks_iris/merge_concat.py b/benchmarks/benchmarks_iris/merge_concat.py deleted file mode 100644 index 2d3738683ad..00000000000 --- a/benchmarks/benchmarks_iris/merge_concat.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``.""" - -import warnings - -import numpy as np - -from iris.cube import CubeList -from iris.warnings import IrisVagueMetadataWarning - -from .generate_data.stock import realistic_4d_w_everything - - -class Merge: - # TODO: Improve coverage. - - cube_list: CubeList - - def setup(self): - source_cube = realistic_4d_w_everything() - - # Merge does not yet fully support cell measures and ancillary variables. - for cm in source_cube.cell_measures(): - source_cube.remove_cell_measure(cm) - for av in source_cube.ancillary_variables(): - source_cube.remove_ancillary_variable(av) - - second_cube = source_cube.copy() - scalar_coord = second_cube.coords(dimensions=[])[0] - scalar_coord.points = scalar_coord.points + 1 - self.cube_list = CubeList([source_cube, second_cube]) - - def time_merge(self): - _ = self.cube_list.merge_cube() - - def tracemalloc_merge(self): - _ = self.cube_list.merge_cube() - - tracemalloc_merge.number = 3 # type: ignore[attr-defined] - - -class Concatenate: - # TODO: Improve coverage. - - cube_list: CubeList - - params = [[False, True]] - param_names = ["Lazy operations"] - - def setup(self, lazy_run: bool): - warnings.filterwarnings("ignore", message="Ignoring a datum") - warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning) - source_cube = realistic_4d_w_everything(lazy=lazy_run) - self.cube_list = CubeList([source_cube]) - for _ in range(24): - next_cube = self.cube_list[-1].copy() - first_dim_coord = next_cube.coord(dimensions=0, dim_coords=True) - first_dim_coord.points = ( - first_dim_coord.points + np.ptp(first_dim_coord.points) + 1 - ) - self.cube_list.append(next_cube) - - def time_concatenate(self, _): - _ = self.cube_list.concatenate_cube() - - def tracemalloc_concatenate(self, _): - _ = self.cube_list.concatenate_cube() - - tracemalloc_concatenate.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/mesh/__init__.py b/benchmarks/benchmarks_iris/mesh/__init__.py deleted file mode 100644 index 9cc76ce0aa4..00000000000 --- a/benchmarks/benchmarks_iris/mesh/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmark tests for the iris.mesh module.""" diff --git a/benchmarks/benchmarks_iris/mesh/utils/__init__.py b/benchmarks/benchmarks_iris/mesh/utils/__init__.py deleted file mode 100644 index e20973c0a78..00000000000 --- a/benchmarks/benchmarks_iris/mesh/utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmark tests for the iris.mesh.utils module.""" diff --git a/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py b/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py deleted file mode 100644 index a61deea56d3..00000000000 --- a/benchmarks/benchmarks_iris/mesh/utils/regions_combine.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmarks stages of operation. - -Benchmarks stages of operation of the function -:func:`iris.mesh.utils.recombine_submeshes`. - -""" - -import os - -import dask.array as da -import numpy as np - -from iris import load, load_cube, save -from iris.mesh.utils import recombine_submeshes - -from ...generate_data.ugrid import make_cube_like_2d_cubesphere - - -class MixinCombineRegions: - # Characterise time taken + memory-allocated, for various stages of combine - # operations on cubesphere-like test data. - params = [50, 500] - param_names = ["cubesphere-N"] - - def _parametrised_cache_filename(self, n_cubesphere, content_name): - return f"cube_C{n_cubesphere}_{content_name}.nc" - - def _make_region_cubes(self, full_mesh_cube): - """Make a fixed number of region cubes from a full meshcube.""" - # Divide the cube into regions. - n_faces = full_mesh_cube.shape[-1] - # Start with a simple list of face indices - # first extend to multiple of 5 - n_faces_5s = 5 * ((n_faces + 1) // 5) - i_faces = np.arange(n_faces_5s, dtype=int) - # reshape (5N,) to (N, 5) - i_faces = i_faces.reshape((n_faces_5s // 5, 5)) - # reorder [2, 3, 4, 0, 1] within each block of 5 - i_faces = np.concatenate([i_faces[:, 2:], i_faces[:, :2]], axis=1) - # flatten to get [2 3 4 0 1 (-) 8 9 10 6 7 (-) 13 14 15 11 12 ...] - i_faces = i_faces.flatten() - # reduce back to original length, wrap any overflows into valid range - i_faces = i_faces[:n_faces] % n_faces - - # Divide into regions -- always slightly uneven, since 7 doesn't divide - n_regions = 7 - n_facesperregion = n_faces // n_regions - i_face_regions = (i_faces // n_facesperregion) % n_regions - region_inds = [ - np.where(i_face_regions == i_region)[0] for i_region in range(n_regions) - ] - # NOTE: this produces 7 regions, with near-adjacent value ranges but - # with some points "moved" to an adjacent region. - # Also, region-0 is bigger (because of not dividing by 7). - - # Finally, make region cubes with these indices. - region_cubes = [full_mesh_cube[..., inds] for inds in region_inds] - return region_cubes - - def setup_cache(self): - """Cache all the necessary source data on disk.""" - # Control dask, to minimise memory usage + allow largest data. - self.fix_dask_settings() - - for n_cubesphere in self.params: - # Do for each parameter, since "setup_cache" is NOT parametrised - mesh_cube = make_cube_like_2d_cubesphere( - n_cube=n_cubesphere, with_mesh=True - ) - # Save to files which include the parameter in the names. - save( - mesh_cube, - self._parametrised_cache_filename(n_cubesphere, "meshcube"), - ) - region_cubes = self._make_region_cubes(mesh_cube) - save( - region_cubes, - self._parametrised_cache_filename(n_cubesphere, "regioncubes"), - ) - - def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=True): - """Combine tests "standard" setup operation. - - Load the source cubes (full-mesh + region) from disk. - These are specific to the cubesize parameter. - The data is cached on disk rather than calculated, to avoid any - pre-loading of the process memory allocation. - - If 'imaginary_data' is set (default), the region cubes data is replaced - with lazy data in the form of a da.zeros(). Otherwise, the region data - is lazy data from the files. - - If 'create_result_cube' is set, create "self.combined_cube" containing - the (still lazy) result. - - NOTE: various test classes override + extend this. - - """ - # Load source cubes (full-mesh and regions) - self.full_mesh_cube = load_cube( - self._parametrised_cache_filename(n_cubesphere, "meshcube") - ) - self.region_cubes = load( - self._parametrised_cache_filename(n_cubesphere, "regioncubes") - ) - - # Remove all var-names from loaded cubes, which can otherwise cause - # problems. Also implement 'imaginary' data. - for cube in self.region_cubes + [self.full_mesh_cube]: - cube.var_name = None - for coord in cube.coords(): - coord.var_name = None - if imaginary_data: - # Replace cube data (lazy file data) with 'imaginary' data. - # This has the same lazy-array attributes, but is allocated by - # creating chunks on demand instead of loading from file. - data = cube.lazy_data() - data = da.zeros(data.shape, dtype=data.dtype, chunks=data.chunksize) - cube.data = data - - if create_result_cube: - self.recombined_cube = self.recombine() - - # Fix dask usage mode for all the subsequent performance tests. - self.fix_dask_settings() - - def fix_dask_settings(self): - """Fix "standard" dask behaviour for time+space testing. - - Currently this is single-threaded mode, with known chunksize, - which is optimised for space saving so we can test largest data. - - """ - import dask.config as dcfg - - # Use single-threaded, to avoid process-switching costs and minimise memory usage. - # N.B. generally may be slower, but use less memory ? - dcfg.set(scheduler="single-threaded") - # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks - dcfg.set({"array.chunk-size": "128Mib"}) - - def recombine(self): - # A handy general shorthand for the main "combine" operation. - result = recombine_submeshes( - self.full_mesh_cube, - self.region_cubes, - index_coord_name="i_mesh_face", - ) - return result - - -class CombineRegionsCreateCube(MixinCombineRegions): - """Time+memory costs of creating a combined-regions cube. - - The result is lazy, and we don't do the actual calculation. - - """ - - def setup(self, n_cubesphere): - # In this case only, do *not* create the result cube. - # That is the operation we want to test. - super().setup(n_cubesphere, create_result_cube=False) - - def time_create_combined_cube(self, n_cubesphere): - self.recombine() - - def tracemalloc_create_combined_cube(self, n_cubesphere): - self.recombine() - - -class CombineRegionsComputeRealData(MixinCombineRegions): - """Time+memory costs of computing combined-regions data.""" - - def time_compute_data(self, n_cubesphere): - _ = self.recombined_cube.data - - def tracemalloc_compute_data(self, n_cubesphere): - _ = self.recombined_cube.data - - -class CombineRegionsSaveData(MixinCombineRegions): - """Test saving *only*. - - Test saving *only*, having replaced the input cube data with 'imaginary' - array data, so that input data is not loaded from disk during the save - operation. - - - """ - - def time_save(self, n_cubesphere): - # Save to disk, which must compute data + stream it to file. - save(self.recombined_cube, "tmp.nc") - - def tracemalloc_save(self, n_cubesphere): - save(self.recombined_cube, "tmp.nc") - - def track_filesize_saved(self, n_cubesphere): - save(self.recombined_cube, "tmp.nc") - return os.path.getsize("tmp.nc") * 1.0e-6 - - -CombineRegionsSaveData.track_filesize_saved.unit = "Mb" # type: ignore[attr-defined] - - -class CombineRegionsFileStreamedCalc(MixinCombineRegions): - """Test the whole cost of file-to-file streaming. - - Uses the combined cube which is based on lazy data loading from the region - cubes on disk. - """ - - def setup(self, n_cubesphere): - # In this case only, do *not* replace the loaded regions data with - # 'imaginary' data, as we want to test file-to-file calculation+save. - super().setup(n_cubesphere, imaginary_data=False) - - def time_stream_file2file(self, n_cubesphere): - # Save to disk, which must compute data + stream it to file. - save(self.recombined_cube, "tmp.nc") - - def tracemalloc_stream_file2file(self, n_cubesphere): - save(self.recombined_cube, "tmp.nc") diff --git a/benchmarks/benchmarks_iris/plot.py b/benchmarks/benchmarks_iris/plot.py deleted file mode 100644 index e8fbb5372d7..00000000000 --- a/benchmarks/benchmarks_iris/plot.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Plot benchmark tests.""" - -import matplotlib as mpl -import numpy as np - -from iris import coords, cube, plot - -mpl.use("agg") - - -class AuxSort: - def setup(self): - # Manufacture data from which contours can be derived. - # Should generate 10 distinct contours, regardless of dim size. - dim_size = 200 - repeat_number = int(dim_size / 10) - repeat_range = range(int((dim_size**2) / repeat_number)) - data = np.repeat(repeat_range, repeat_number) - data = data.reshape((dim_size,) * 2) - - # These benchmarks are from a user perspective, so setting up a - # user-level case that will prompt the calling of aux_coords.sort in plot.py. - dim_coord = coords.DimCoord(np.arange(dim_size)) - local_cube = cube.Cube(data) - local_cube.add_aux_coord(dim_coord, 0) - self.cube = local_cube - - def time_aux_sort(self): - # Contour plot arbitrarily picked. Known to prompt aux_coords.sort. - plot.contour(self.cube) diff --git a/benchmarks/benchmarks_iris/regridding.py b/benchmarks/benchmarks_iris/regridding.py deleted file mode 100644 index e227da0ec69..00000000000 --- a/benchmarks/benchmarks_iris/regridding.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Regridding benchmark test.""" - -# import iris tests first so that some things can be initialised before -# importing anything else -from iris import tests # isort:skip - -import numpy as np - -import iris -from iris.analysis import AreaWeighted, PointInCell -from iris.coords import AuxCoord - - -class HorizontalChunkedRegridding: - def setup(self) -> None: - # Prepare a cube and a template - - cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) - self.cube = iris.load_cube(cube_file_path) - - # Prepare a tougher cube and chunk it - chunked_cube_file_path = tests.get_data_path( - ["NetCDF", "regrid", "regrid_xyt.nc"] - ) - self.chunked_cube = iris.load_cube(chunked_cube_file_path) - - # Chunked data makes the regridder run repeatedly - self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) - - template_file_path = tests.get_data_path( - ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] - ) - self.template_cube = iris.load_cube(template_file_path) - - # Prepare a regridding scheme - self.scheme_area_w = AreaWeighted() - - def time_regrid_area_w(self) -> None: - # Regrid the cube onto the template. - out = self.cube.regrid(self.template_cube, self.scheme_area_w) - # Realise the data - out.data - - def time_regrid_area_w_new_grid(self) -> None: - # Regrid the chunked cube - out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w) - # Realise data - out.data - - def tracemalloc_regrid_area_w(self) -> None: - # Regrid the chunked cube - out = self.cube.regrid(self.template_cube, self.scheme_area_w) - # Realise data - out.data - - tracemalloc_regrid_area_w.number = 3 # type: ignore[attr-defined] - - def tracemalloc_regrid_area_w_new_grid(self) -> None: - # Regrid the chunked cube - out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w) - # Realise data - out.data - - tracemalloc_regrid_area_w_new_grid.number = 3 # type: ignore[attr-defined] - - -class CurvilinearRegridding: - def setup(self) -> None: - # Prepare a cube and a template - - cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) - self.cube = iris.load_cube(cube_file_path) - - # Make the source cube curvilinear - x_coord = self.cube.coord("longitude") - y_coord = self.cube.coord("latitude") - xx, yy = np.meshgrid(x_coord.points, y_coord.points) - self.cube.remove_coord(x_coord) - self.cube.remove_coord(y_coord) - x_coord_2d = AuxCoord( - xx, - standard_name=x_coord.standard_name, - units=x_coord.units, - coord_system=x_coord.coord_system, - ) - y_coord_2d = AuxCoord( - yy, - standard_name=y_coord.standard_name, - units=y_coord.units, - coord_system=y_coord.coord_system, - ) - self.cube.add_aux_coord(x_coord_2d, (1, 2)) - self.cube.add_aux_coord(y_coord_2d, (1, 2)) - - template_file_path = tests.get_data_path( - ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] - ) - self.template_cube = iris.load_cube(template_file_path) - - # Prepare a regridding scheme - self.scheme_pic = PointInCell() - - def time_regrid_pic(self) -> None: - # Regrid the cube onto the template. - out = self.cube.regrid(self.template_cube, self.scheme_pic) - # Realise the data - out.data - - def tracemalloc_regrid_pic(self) -> None: - # Regrid the cube onto the template. - out = self.cube.regrid(self.template_cube, self.scheme_pic) - # Realise the data - out.data - - tracemalloc_regrid_pic.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/save.py b/benchmarks/benchmarks_iris/save.py deleted file mode 100644 index 4bac1b14505..00000000000 --- a/benchmarks/benchmarks_iris/save.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File saving benchmarks.""" - -from iris import save -from iris.mesh import save_mesh - -from .generate_data.ugrid import make_cube_like_2d_cubesphere - - -class NetcdfSave: - params = [[50, 600], [False, True]] - param_names = ["cubesphere-N", "is_unstructured"] - - def setup(self, n_cubesphere, is_unstructured): - self.cube = make_cube_like_2d_cubesphere( - n_cube=n_cubesphere, with_mesh=is_unstructured - ) - - def _save_data(self, cube, do_copy=True): - if do_copy: - # Copy the cube, to avoid distorting the results by changing it - # Because we known that older Iris code realises lazy coords - cube = cube.copy() - save(cube, "tmp.nc") - - def _save_mesh(self, cube): - # In this case, we are happy that the mesh is *not* modified - save_mesh(cube.mesh, "mesh.nc") - - def time_netcdf_save_cube(self, n_cubesphere, is_unstructured): - self._save_data(self.cube) - - def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured): - if is_unstructured: - self._save_mesh(self.cube) - - def tracemalloc_netcdf_save(self, n_cubesphere, is_unstructured): - # Don't need to copy the cube here since track_ benchmarks don't - # do repeats between self.setup() calls. - self._save_data(self.cube, do_copy=False) diff --git a/benchmarks/benchmarks_iris/sperf/__init__.py b/benchmarks/benchmarks_iris/sperf/__init__.py deleted file mode 100644 index 2b8b508fd57..00000000000 --- a/benchmarks/benchmarks_iris/sperf/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project. - -SPerf = assessing performance against a series of increasingly large LFRic -datasets. -""" - -from iris import load_cube - -from ..generate_data.ugrid import make_cubesphere_testfile - - -class FileMixin: - """For use in any benchmark classes that work on a file.""" - - # Allows time for large file generation. - timeout = 3600.0 - # Largest file with these params: ~90GB. - # Total disk space: ~410GB. - params = [ - [12, 384, 640, 960, 1280, 1668], - [1, 36, 72], - [1, 3, 10], - ] - param_names = ["cubesphere_C", "N levels", "N time steps"] - # cubesphere_C: notation refers to faces per panel. - # e.g. C1 is 6 faces, 8 nodes - - def setup(self, c_size, n_levels, n_times): - self.file_path = make_cubesphere_testfile( - c_size=c_size, n_levels=n_levels, n_times=n_times - ) - - def load_cube(self): - return load_cube(str(self.file_path)) diff --git a/benchmarks/benchmarks_iris/sperf/combine_regions.py b/benchmarks/benchmarks_iris/sperf/combine_regions.py deleted file mode 100644 index 591b7bb9bea..00000000000 --- a/benchmarks/benchmarks_iris/sperf/combine_regions.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Region combine benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" - -import os.path - -from dask import array as da -import numpy as np - -from iris import load, load_cube, save -from iris.mesh.utils import recombine_submeshes - -from .. import on_demand_benchmark -from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere - - -class Mixin: - # Characterise time taken + memory-allocated, for various stages of combine - # operations on cubesphere-like test data. - timeout = 300.0 - params = [100, 200, 300, 500, 1000, 1668] - param_names = ["cubesphere_C"] - # Fix result units for the tracking benchmarks. - unit = "Mb" - temp_save_path = BENCHMARK_DATA / "tmp.nc" - - def _parametrised_cache_filename(self, n_cubesphere, content_name): - return BENCHMARK_DATA / f"cube_C{n_cubesphere}_{content_name}.nc" - - def _make_region_cubes(self, full_mesh_cube): - """Make a fixed number of region cubes from a full meshcube.""" - # Divide the cube into regions. - n_faces = full_mesh_cube.shape[-1] - # Start with a simple list of face indices - # first extend to multiple of 5 - n_faces_5s = 5 * ((n_faces + 1) // 5) - i_faces = np.arange(n_faces_5s, dtype=int) - # reshape (5N,) to (N, 5) - i_faces = i_faces.reshape((n_faces_5s // 5, 5)) - # reorder [2, 3, 4, 0, 1] within each block of 5 - i_faces = np.concatenate([i_faces[:, 2:], i_faces[:, :2]], axis=1) - # flatten to get [2 3 4 0 1 (-) 8 9 10 6 7 (-) 13 14 15 11 12 ...] - i_faces = i_faces.flatten() - # reduce back to original length, wrap any overflows into valid range - i_faces = i_faces[:n_faces] % n_faces - - # Divide into regions -- always slightly uneven, since 7 doesn't divide - n_regions = 7 - n_facesperregion = n_faces // n_regions - i_face_regions = (i_faces // n_facesperregion) % n_regions - region_inds = [ - np.where(i_face_regions == i_region)[0] for i_region in range(n_regions) - ] - # NOTE: this produces 7 regions, with near-adjacent value ranges but - # with some points "moved" to an adjacent region. - # Also, region-0 is bigger (because of not dividing by 7). - - # Finally, make region cubes with these indices. - region_cubes = [full_mesh_cube[..., inds] for inds in region_inds] - return region_cubes - - def setup_cache(self): - """Cache all the necessary source data on disk.""" - # Control dask, to minimise memory usage + allow largest data. - self.fix_dask_settings() - - for n_cubesphere in self.params: - # Do for each parameter, since "setup_cache" is NOT parametrised - mesh_cube = make_cube_like_2d_cubesphere( - n_cube=n_cubesphere, with_mesh=True - ) - # Save to files which include the parameter in the names. - save( - mesh_cube, - self._parametrised_cache_filename(n_cubesphere, "meshcube"), - ) - region_cubes = self._make_region_cubes(mesh_cube) - save( - region_cubes, - self._parametrised_cache_filename(n_cubesphere, "regioncubes"), - ) - - def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=True): - """Combine tests "standard" setup operation. - - Load the source cubes (full-mesh + region) from disk. - These are specific to the cubesize parameter. - The data is cached on disk rather than calculated, to avoid any - pre-loading of the process memory allocation. - - If 'imaginary_data' is set (default), the region cubes data is replaced - with lazy data in the form of a da.zeros(). Otherwise, the region data - is lazy data from the files. - - If 'create_result_cube' is set, create "self.combined_cube" containing - the (still lazy) result. - - NOTE: various test classes override + extend this. - - """ - # Load source cubes (full-mesh and regions) - self.full_mesh_cube = load_cube( - self._parametrised_cache_filename(n_cubesphere, "meshcube") - ) - self.region_cubes = load( - self._parametrised_cache_filename(n_cubesphere, "regioncubes") - ) - - # Remove all var-names from loaded cubes, which can otherwise cause - # problems. Also implement 'imaginary' data. - for cube in self.region_cubes + [self.full_mesh_cube]: - cube.var_name = None - for coord in cube.coords(): - coord.var_name = None - if imaginary_data: - # Replace cube data (lazy file data) with 'imaginary' data. - # This has the same lazy-array attributes, but is allocated by - # creating chunks on demand instead of loading from file. - data = cube.lazy_data() - data = da.zeros(data.shape, dtype=data.dtype, chunks=data.chunksize) - cube.data = data - - if create_result_cube: - self.recombined_cube = self.recombine() - - # Fix dask usage mode for all the subsequent performance tests. - self.fix_dask_settings() - - def teardown(self, _): - self.temp_save_path.unlink(missing_ok=True) - - def fix_dask_settings(self): - """Fix "standard" dask behaviour for time+space testing. - - Currently this is single-threaded mode, with known chunksize, - which is optimised for space saving so we can test largest data. - - """ - import dask.config as dcfg - - # Use single-threaded, to avoid process-switching costs and minimise memory usage. - # N.B. generally may be slower, but use less memory ? - dcfg.set(scheduler="single-threaded") - # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks - dcfg.set({"array.chunk-size": "128Mib"}) - - def recombine(self): - # A handy general shorthand for the main "combine" operation. - result = recombine_submeshes( - self.full_mesh_cube, - self.region_cubes, - index_coord_name="i_mesh_face", - ) - return result - - def save_recombined_cube(self): - save(self.recombined_cube, self.temp_save_path) - - -@on_demand_benchmark -class CreateCube(Mixin): - """Time+memory costs of creating a combined-regions cube. - - The result is lazy, and we don't do the actual calculation. - - """ - - def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=False): - # In this case only, do *not* create the result cube. - # That is the operation we want to test. - super().setup(n_cubesphere, imaginary_data, create_result_cube) - - def time_create_combined_cube(self, n_cubesphere): - self.recombine() - - def tracemalloc_create_combined_cube(self, n_cubesphere): - self.recombine() - - -@on_demand_benchmark -class ComputeRealData(Mixin): - """Time+memory costs of computing combined-regions data.""" - - def time_compute_data(self, n_cubesphere): - _ = self.recombined_cube.data - - def tracemalloc_compute_data(self, n_cubesphere): - _ = self.recombined_cube.data - - -@on_demand_benchmark -class SaveData(Mixin): - """Test saving *only*. - - Test saving *only*, having replaced the input cube data with 'imaginary' - array data, so that input data is not loaded from disk during the save - operation. - - """ - - def time_save(self, n_cubesphere): - # Save to disk, which must compute data + stream it to file. - self.save_recombined_cube() - - def tracemalloc_save(self, n_cubesphere): - self.save_recombined_cube() - - def track_filesize_saved(self, n_cubesphere): - self.save_recombined_cube() - return self.temp_save_path.stat().st_size * 1.0e-6 - - -@on_demand_benchmark -class FileStreamedCalc(Mixin): - """Test the whole cost of file-to-file streaming. - - Uses the combined cube which is based on lazy data loading from the region - cubes on disk. - - """ - - def setup(self, n_cubesphere, imaginary_data=False, create_result_cube=True): - # In this case only, do *not* replace the loaded regions data with - # 'imaginary' data, as we want to test file-to-file calculation+save. - super().setup(n_cubesphere, imaginary_data, create_result_cube) - - def time_stream_file2file(self, n_cubesphere): - # Save to disk, which must compute data + stream it to file. - self.save_recombined_cube() - - def tracemalloc_stream_file2file(self, n_cubesphere): - self.save_recombined_cube() diff --git a/benchmarks/benchmarks_iris/sperf/equality.py b/benchmarks/benchmarks_iris/sperf/equality.py deleted file mode 100644 index ddee90cd283..00000000000 --- a/benchmarks/benchmarks_iris/sperf/equality.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Equality benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" - -from .. import on_demand_benchmark -from . import FileMixin - - -@on_demand_benchmark -class CubeEquality(FileMixin): - r"""Benchmark time and memory costs. - - Benchmark time and memory costs of comparing :class:`~iris.cube.Cube`\\ s - with attached :class:`~iris.mesh.MeshXY`\\ es. - - Uses :class:`FileMixin` as the realistic case will be comparing - :class:`~iris.cube.Cube`\\ s that have been loaded from file. - - """ - - # Cut down paremt parameters. - params = [FileMixin.params[0]] - - def setup(self, c_size, n_levels=1, n_times=1): - super().setup(c_size, n_levels, n_times) - self.cube = self.load_cube() - self.other_cube = self.load_cube() - - def peakmem_eq(self, n_cube): - _ = self.cube == self.other_cube - - def time_eq(self, n_cube): - _ = self.cube == self.other_cube diff --git a/benchmarks/benchmarks_iris/sperf/load.py b/benchmarks/benchmarks_iris/sperf/load.py deleted file mode 100644 index d304a30c827..00000000000 --- a/benchmarks/benchmarks_iris/sperf/load.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File loading benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" - -from .. import on_demand_benchmark -from . import FileMixin - - -@on_demand_benchmark -class Load(FileMixin): - def time_load_cube(self, _, __, ___): - _ = self.load_cube() - - -@on_demand_benchmark -class Realise(FileMixin): - def setup(self, c_size, n_levels, n_times): - super().setup(c_size, n_levels, n_times) - self.loaded_cube = self.load_cube() - - def time_realise_cube(self, _, __, ___): - # Don't touch loaded_cube.data - permanent realisation plays badly with - # ASV's re-run strategy. - assert self.loaded_cube.has_lazy_data() - self.loaded_cube.core_data().compute() diff --git a/benchmarks/benchmarks_iris/sperf/save.py b/benchmarks/benchmarks_iris/sperf/save.py deleted file mode 100644 index a715ec24240..00000000000 --- a/benchmarks/benchmarks_iris/sperf/save.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""File saving benchmarks for the SPerf scheme of the UK Met Office's NG-VAT project.""" - -import os.path - -from iris import save -from iris.mesh import save_mesh - -from .. import on_demand_benchmark -from ..generate_data.ugrid import make_cube_like_2d_cubesphere - - -@on_demand_benchmark -class NetcdfSave: - """Benchmark time and memory costs of saving ~large-ish data cubes to netcdf.""" - - params = [[1, 100, 200, 300, 500, 1000, 1668], [False, True]] - param_names = ["cubesphere_C", "is_unstructured"] - # Fix result units for the tracking benchmarks. - unit = "Mb" - - def setup(self, n_cubesphere, is_unstructured): - self.cube = make_cube_like_2d_cubesphere( - n_cube=n_cubesphere, with_mesh=is_unstructured - ) - - def _save_cube(self, cube): - save(cube, "tmp.nc") - - def _save_mesh(self, cube): - save_mesh(cube.mesh, "mesh.nc") - - def time_save_cube(self, n_cubesphere, is_unstructured): - self._save_cube(self.cube) - - def tracemalloc_save_cube(self, n_cubesphere, is_unstructured): - self._save_cube(self.cube) - - def time_save_mesh(self, n_cubesphere, is_unstructured): - if is_unstructured: - self._save_mesh(self.cube) - - # The filesizes make a good reference point for the 'addedmem' memory - # usage results. - def track_filesize_save_cube(self, n_cubesphere, is_unstructured): - self._save_cube(self.cube) - return os.path.getsize("tmp.nc") * 1.0e-6 diff --git a/benchmarks/benchmarks_iris/stats.py b/benchmarks/benchmarks_iris/stats.py deleted file mode 100644 index fbab12cd4b2..00000000000 --- a/benchmarks/benchmarks_iris/stats.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Stats benchmark tests.""" - -import iris -from iris.analysis.stats import pearsonr -import iris.tests - - -class PearsonR: - def setup(self): - cube_temp = iris.load_cube( - iris.tests.get_data_path( - ("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc") - ) - ) - - # Make data non-lazy. - cube_temp.data - - self.cube_a = cube_temp[:6] - self.cube_b = cube_temp[20:26] - self.cube_b.replace_coord(self.cube_a.coord("time")) - for name in ["latitude", "longitude"]: - self.cube_b.coord(name).guess_bounds() - self.weights = iris.analysis.cartography.area_weights(self.cube_b) - - def time_real(self): - pearsonr(self.cube_a, self.cube_b, weights=self.weights) - - def tracemalloc_real(self): - pearsonr(self.cube_a, self.cube_b, weights=self.weights) - - tracemalloc_real.number = 3 # type: ignore[attr-defined] - - def time_lazy(self): - for cube in self.cube_a, self.cube_b: - cube.data = cube.lazy_data() - - result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) - result.data - - def tracemalloc_lazy(self): - for cube in self.cube_a, self.cube_b: - cube.data = cube.lazy_data() - - result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) - result.data - - tracemalloc_lazy.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/trajectory.py b/benchmarks/benchmarks_iris/trajectory.py deleted file mode 100644 index 77825ef2f2b..00000000000 --- a/benchmarks/benchmarks_iris/trajectory.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Trajectory benchmark test.""" - -# import iris tests first so that some things can be initialised before -# importing anything else -from iris import tests # isort:skip - -import numpy as np - -import iris -from iris.analysis.trajectory import interpolate - - -class TrajectoryInterpolation: - def setup(self) -> None: - # Prepare a cube and a template - - cube_file_path = tests.get_data_path(["NetCDF", "regrid", "regrid_xyt.nc"]) - self.cube = iris.load_cube(cube_file_path) - - trajectory = np.array([np.array((-50 + i, -50 + i)) for i in range(100)]) - self.sample_points = [ - ("longitude", trajectory[:, 0]), - ("latitude", trajectory[:, 1]), - ] - - def time_trajectory_linear(self) -> None: - # Regrid the cube onto the template. - out_cube = interpolate(self.cube, self.sample_points, method="linear") - # Realise the data - out_cube.data - - def tracemalloc_trajectory_linear(self) -> None: - # Regrid the cube onto the template. - out_cube = interpolate(self.cube, self.sample_points, method="linear") - # Realise the data - out_cube.data - - tracemalloc_trajectory_linear.number = 3 # type: ignore[attr-defined] - - def time_trajectory_nearest(self) -> None: - # Regrid the cube onto the template. - out_cube = interpolate(self.cube, self.sample_points, method="nearest") - # Realise the data - out_cube.data - - def tracemalloc_trajectory_nearest(self) -> None: - # Regrid the cube onto the template. - out_cube = interpolate(self.cube, self.sample_points, method="nearest") - # Realise the data - out_cube.data - - tracemalloc_trajectory_nearest.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks_iris/unit_style/__init__disabled.py b/benchmarks/benchmarks_iris/unit_style/__init__disabled.py deleted file mode 100644 index d7f84c2b919..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/__init__disabled.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope benchmarks that can help with performance investigations. - -By renaming ``__init__.py`` these are all disabled by default: - -- They bloat benchmark run-time. -- They are too vulnerable to 'noise' due to their small scope - small objects, - short operations - they report a lot of false positive regressions. -- We rely on the wider-scope integration-style benchmarks to flag performance - changes, upon which we expect to do some manual investigation - these - smaller benchmarks can be run then. - -""" diff --git a/benchmarks/benchmarks_iris/unit_style/aux_factory.py b/benchmarks/benchmarks_iris/unit_style/aux_factory.py deleted file mode 100644 index 329a2b0bdaf..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/aux_factory.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope AuxFactory benchmark tests.""" - -import numpy as np - -from iris import aux_factory, coords - - -class FactoryCommon: - # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: - # * make class an ABC - # * remove NotImplementedError - # * combine setup_common into setup - """Run a generalised suite of benchmarks for any factory. - - A base class running a generalised suite of benchmarks for any factory. - Factory to be specified in a subclass. - - ASV will run the benchmarks within this class for any subclasses. - - Should only be instantiated within subclasses, but cannot enforce this - since ASV cannot handle classes that include abstract methods. - """ - - def setup(self): - """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" - raise NotImplementedError - - def setup_common(self): - """Shared setup code that can be called by subclasses.""" - self.factory = self.create() - - def time_create(self): - """Create an instance of the benchmarked factory. - - Create method is specified in the subclass. - """ - self.create() - - -class HybridHeightFactory(FactoryCommon): - def setup(self): - data_1d = np.zeros(1000) - self.coord = coords.AuxCoord(points=data_1d, units="m") - - self.setup_common() - - def create(self): - return aux_factory.HybridHeightFactory(delta=self.coord) diff --git a/benchmarks/benchmarks_iris/unit_style/coords.py b/benchmarks/benchmarks_iris/unit_style/coords.py deleted file mode 100644 index 704746f190a..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/coords.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope Coord benchmark tests.""" - -import numpy as np - -from iris import coords - -from .. import disable_repeat_between_setup - - -def setup(): - """General variables needed by multiple benchmark classes.""" - global data_1d - - data_1d = np.zeros(1000) - - -class CoordCommon: - # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: - # * make class an ABC - # * remove NotImplementedError - # * combine setup_common into setup - """Run a generalised suite of benchmarks for any coord. - - A base class running a generalised suite of benchmarks for any coord. - Coord to be specified in a subclass. - - ASV will run the benchmarks within this class for any subclasses. - - Should only be instantiated within subclasses, but cannot enforce this - since ASV cannot handle classes that include abstract methods. - """ - - def setup(self): - """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" - raise NotImplementedError - - def setup_common(self): - """Shared setup code that can be called by subclasses.""" - self.component = self.create() - - def time_create(self): - """Create an instance of the benchmarked factory. - - Create method is specified in the subclass. - """ - self.create() - - -class DimCoord(CoordCommon): - def setup(self): - point_values = np.arange(1000) - bounds = np.array([point_values - 1, point_values + 1]).transpose() - - self.create_kwargs = { - "points": point_values, - "bounds": bounds, - "units": "days since 1970-01-01", - "climatological": True, - } - - self.setup_common() - - def create(self): - return coords.DimCoord(**self.create_kwargs) - - def time_regular(self): - coords.DimCoord.from_regular(0, 1, 1000) - - -class AuxCoord(CoordCommon): - def setup(self): - bounds = np.array([data_1d - 1, data_1d + 1]).transpose() - - self.create_kwargs = { - "points": data_1d, - "bounds": bounds, - "units": "days since 1970-01-01", - "climatological": True, - } - - self.setup_common() - - def create(self): - return coords.AuxCoord(**self.create_kwargs) - - def time_points(self): - _ = self.component.points - - def time_bounds(self): - _ = self.component.bounds - - -@disable_repeat_between_setup -class AuxCoordLazy(AuxCoord): - """Lazy equivalent of :class:`AuxCoord`.""" - - def setup(self): - super().setup() - self.create_kwargs["points"] = self.component.lazy_points() - self.create_kwargs["bounds"] = self.component.lazy_bounds() - self.setup_common() - - -class CellMeasure(CoordCommon): - def setup(self): - self.setup_common() - - def create(self): - return coords.CellMeasure(data_1d) - - -class CellMethod(CoordCommon): - def setup(self): - self.setup_common() - - def create(self): - return coords.CellMethod("test") - - -class AncillaryVariable(CoordCommon): - def setup(self): - self.setup_common() - - def create(self): - return coords.AncillaryVariable(data_1d) diff --git a/benchmarks/benchmarks_iris/unit_style/cube.py b/benchmarks/benchmarks_iris/unit_style/cube.py deleted file mode 100644 index 780418aa148..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/cube.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope Cube benchmark tests.""" - -import numpy as np - -from iris import analysis, aux_factory, coords, cube - -from .. import disable_repeat_between_setup -from ..generate_data.stock import sample_meshcoord - - -def setup(*params): - """General variables needed by multiple benchmark classes.""" - global data_1d - global data_2d - global general_cube - - data_2d = np.zeros((1000,) * 2) - data_1d = data_2d[0] - general_cube = cube.Cube(data_2d) - - -class ComponentCommon: - # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released: - # * make class an ABC - # * remove NotImplementedError - # * combine setup_common into setup - """Run a generalised suite of benchmarks for cubes. - - A base class running a generalised suite of benchmarks for cubes that - include a specified component (e.g. Coord, CellMeasure etc.). Component to - be specified in a subclass. - - ASV will run the benchmarks within this class for any subclasses. - - Should only be instantiated within subclasses, but cannot enforce this - since ASV cannot handle classes that include abstract methods. - """ - - def setup(self): - """Prevent ASV instantiating (must therefore override setup() in any subclasses.).""" - raise NotImplementedError - - def create(self): - """Create a cube (generic). - - cube_kwargs allow dynamic inclusion of different components; - specified in subclasses. - """ - return cube.Cube(data=data_2d, **self.cube_kwargs) - - def setup_common(self): - """Shared setup code that can be called by subclasses.""" - self.cube = self.create() - - def time_create(self): - """Create a cube that includes an instance of the benchmarked component.""" - self.create() - - def time_add(self): - """Add an instance of the benchmarked component to an existing cube.""" - # Unable to create the copy during setup since this needs to be re-done - # for every repeat of the test (some components disallow duplicates). - general_cube_copy = general_cube.copy(data=data_2d) - self.add_method(general_cube_copy, *self.add_args) - - -class Cube: - def time_basic(self): - cube.Cube(data_2d) - - def time_rename(self): - general_cube.name = "air_temperature" - - -class AuxCoord(ComponentCommon): - def setup(self): - self.coord_name = "test" - coord_bounds = np.array([data_1d - 1, data_1d + 1]).transpose() - aux_coord = coords.AuxCoord( - long_name=self.coord_name, - points=data_1d, - bounds=coord_bounds, - units="days since 1970-01-01", - climatological=True, - ) - - # Variables needed by the ComponentCommon base class. - self.cube_kwargs = {"aux_coords_and_dims": [(aux_coord, 0)]} - self.add_method = cube.Cube.add_aux_coord - self.add_args = (aux_coord, (0)) - - self.setup_common() - - def time_return_coords(self): - self.cube.coords() - - def time_return_coord_dims(self): - self.cube.coord_dims(self.coord_name) - - -class AuxFactory(ComponentCommon): - def setup(self): - coord = coords.AuxCoord(points=data_1d, units="m") - self.hybrid_factory = aux_factory.HybridHeightFactory(delta=coord) - - # Variables needed by the ComponentCommon base class. - self.cube_kwargs = { - "aux_coords_and_dims": [(coord, 0)], - "aux_factories": [self.hybrid_factory], - } - - self.setup_common() - - # Variables needed by the overridden time_add benchmark in this subclass. - cube_w_coord = self.cube.copy() - [cube_w_coord.remove_aux_factory(i) for i in cube_w_coord.aux_factories] - self.cube_w_coord = cube_w_coord - - def time_add(self): - # Requires override from super().time_add because the cube needs an - # additional coord. - self.cube_w_coord.add_aux_factory(self.hybrid_factory) - - -class CellMeasure(ComponentCommon): - def setup(self): - cell_measure = coords.CellMeasure(data_1d) - - # Variables needed by the ComponentCommon base class. - self.cube_kwargs = {"cell_measures_and_dims": [(cell_measure, 0)]} - self.add_method = cube.Cube.add_cell_measure - self.add_args = (cell_measure, 0) - - self.setup_common() - - -class CellMethod(ComponentCommon): - def setup(self): - cell_method = coords.CellMethod("test") - - # Variables needed by the ComponentCommon base class. - self.cube_kwargs = {"cell_methods": [cell_method]} - self.add_method = cube.Cube.add_cell_method - self.add_args = [cell_method] - - self.setup_common() - - -class AncillaryVariable(ComponentCommon): - def setup(self): - ancillary_variable = coords.AncillaryVariable(data_1d) - - # Variables needed by the ComponentCommon base class. - self.cube_kwargs = {"ancillary_variables_and_dims": [(ancillary_variable, 0)]} - self.add_method = cube.Cube.add_ancillary_variable - self.add_args = (ancillary_variable, 0) - - self.setup_common() - - -class MeshCoord: - params = [ - 6, # minimal cube-sphere - int(1e6), # realistic cube-sphere size - 1000, # To match size in :class:`AuxCoord` - ] - param_names = ["number of faces"] - - def setup(self, n_faces): - mesh_kwargs = dict(n_nodes=n_faces + 2, n_edges=n_faces * 2, n_faces=n_faces) - - self.mesh_coord = sample_meshcoord(sample_mesh_kwargs=mesh_kwargs) - self.data = np.zeros(n_faces) - self.cube_blank = cube.Cube(data=self.data) - self.cube = self.create() - - def create(self): - return cube.Cube(data=self.data, aux_coords_and_dims=[(self.mesh_coord, 0)]) - - def time_create(self, n_faces): - _ = self.create() - - @disable_repeat_between_setup - def time_add(self, n_faces): - self.cube_blank.add_aux_coord(self.mesh_coord, 0) - - @disable_repeat_between_setup - def time_remove(self, n_faces): - self.cube.remove_coord(self.mesh_coord) - - -class Merge: - def setup(self): - self.cube_list = cube.CubeList() - for i in np.arange(2): - i_cube = general_cube.copy() - i_coord = coords.AuxCoord([i]) - i_cube.add_aux_coord(i_coord) - self.cube_list.append(i_cube) - - def time_merge(self): - self.cube_list.merge() - - -class Concatenate: - def setup(self): - dim_size = 1000 - self.cube_list = cube.CubeList() - for i in np.arange(dim_size * 2, step=dim_size): - i_cube = general_cube.copy() - i_coord = coords.DimCoord(np.arange(dim_size) + (i * dim_size)) - i_cube.add_dim_coord(i_coord, 0) - self.cube_list.append(i_cube) - - def time_concatenate(self): - self.cube_list.concatenate() - - -class Equality: - def setup(self): - self.cube_a = general_cube.copy() - self.cube_b = general_cube.copy() - - aux_coord = coords.AuxCoord(data_1d) - self.cube_a.add_aux_coord(aux_coord, 0) - self.cube_b.add_aux_coord(aux_coord, 1) - - def time_equality(self): - self.cube_a == self.cube_b - - -class Aggregation: - def setup(self): - repeat_number = 10 - repeat_range = range(int(1000 / repeat_number)) - array_repeat = np.repeat(repeat_range, repeat_number) - array_unique = np.arange(len(array_repeat)) - - coord_repeat = coords.AuxCoord(points=array_repeat, long_name="repeat") - coord_unique = coords.DimCoord(points=array_unique, long_name="unique") - - local_cube = general_cube.copy() - local_cube.add_aux_coord(coord_repeat, 0) - local_cube.add_dim_coord(coord_unique, 0) - self.cube = local_cube - - def time_aggregated_by(self): - self.cube.aggregated_by("repeat", analysis.MEAN) diff --git a/benchmarks/benchmarks_iris/unit_style/mesh.py b/benchmarks/benchmarks_iris/unit_style/mesh.py deleted file mode 100644 index ed3aad14285..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/mesh.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Benchmark tests for the iris.mesh module.""" - -from copy import deepcopy - -import numpy as np - -from iris import mesh - -from .. import disable_repeat_between_setup -from ..generate_data.stock import sample_mesh - - -class UGridCommon: - """Run a generalised suite of benchmarks for any mesh object. - - A base class running a generalised suite of benchmarks for any mesh object. - Object to be specified in a subclass. - - ASV will run the benchmarks within this class for any subclasses. - - ASV will not benchmark this class as setup() triggers a NotImplementedError. - (ASV has not yet released ABC/abstractmethod support - asv#838). - - """ - - params = [ - 6, # minimal cube-sphere - int(1e6), # realistic cube-sphere size - ] - param_names = ["number of faces"] - - def setup(self, *params): - self.object = self.create() - - def create(self): - raise NotImplementedError - - def time_create(self, *params): - """Create an instance of the benchmarked object. - - create() method is specified in the subclass. - """ - self.create() - - -class Connectivity(UGridCommon): - def setup(self, n_faces): - self.array = np.zeros([n_faces, 3], dtype=int) - super().setup(n_faces) - - def create(self): - return mesh.Connectivity(indices=self.array, cf_role="face_node_connectivity") - - def time_indices(self, n_faces): - _ = self.object.indices - - def time_location_lengths(self, n_faces): - # Proofed against the Connectivity name change (633ed17). - if getattr(self.object, "src_lengths", False): - meth = self.object.src_lengths - else: - meth = self.object.location_lengths - _ = meth() - - def time_validate_indices(self, n_faces): - self.object.validate_indices() - - -@disable_repeat_between_setup -class ConnectivityLazy(Connectivity): - """Lazy equivalent of :class:`Connectivity`.""" - - def setup(self, n_faces): - super().setup(n_faces) - self.array = self.object.lazy_indices() - self.object = self.create() - - -class MeshXY(UGridCommon): - def setup(self, n_faces, lazy=False): - #### - # Steal everything from the sample mesh for benchmarking creation of a - # brand new mesh. - source_mesh = sample_mesh( - n_nodes=n_faces + 2, - n_edges=n_faces * 2, - n_faces=n_faces, - lazy_values=lazy, - ) - - def get_coords_and_axes(location): - return [ - (source_mesh.coord(axis=axis, location=location), axis) - for axis in ("x", "y") - ] - - self.mesh_kwargs = dict( - topology_dimension=source_mesh.topology_dimension, - node_coords_and_axes=get_coords_and_axes("node"), - connectivities=source_mesh.connectivities(), - edge_coords_and_axes=get_coords_and_axes("edge"), - face_coords_and_axes=get_coords_and_axes("face"), - ) - #### - - super().setup(n_faces) - - self.face_node = self.object.face_node_connectivity - self.node_x = self.object.node_coords.node_x - # Kwargs for reuse in search and remove methods. - self.connectivities_kwarg = dict(cf_role="edge_node_connectivity") - self.coords_kwarg = dict(location="face") - - # TODO: an opportunity for speeding up runtime if needed, since - # eq_object is not needed for all benchmarks. Just don't generate it - # within a benchmark - the execution time is large enough that it - # could be a significant portion of the benchmark - makes regressions - # smaller and could even pick up regressions in copying instead! - self.eq_object = deepcopy(self.object) - - def create(self): - return mesh.MeshXY(**self.mesh_kwargs) - - def time_add_connectivities(self, n_faces): - self.object.add_connectivities(self.face_node) - - def time_add_coords(self, n_faces): - self.object.add_coords(node_x=self.node_x) - - def time_connectivities(self, n_faces): - _ = self.object.connectivities(**self.connectivities_kwarg) - - def time_coords(self, n_faces): - _ = self.object.coords(**self.coords_kwarg) - - def time_eq(self, n_faces): - _ = self.object == self.eq_object - - def time_remove_connectivities(self, n_faces): - self.object.remove_connectivities(**self.connectivities_kwarg) - - def time_remove_coords(self, n_faces): - self.object.remove_coords(**self.coords_kwarg) - - -@disable_repeat_between_setup -class MeshXYLazy(MeshXY): - """Lazy equivalent of :class:`MeshXY`.""" - - def setup(self, n_faces, lazy=True): - super().setup(n_faces, lazy=lazy) - - -class MeshCoord(UGridCommon): - # Add extra parameter value to match AuxCoord benchmarking. - params = UGridCommon.params + [1000] - - def setup(self, n_faces, lazy=False): - self.mesh = sample_mesh( - n_nodes=n_faces + 2, - n_edges=n_faces * 2, - n_faces=n_faces, - lazy_values=lazy, - ) - - super().setup(n_faces) - - def create(self): - return mesh.MeshCoord(mesh=self.mesh, location="face", axis="x") - - def time_points(self, n_faces): - _ = self.object.points - - def time_bounds(self, n_faces): - _ = self.object.bounds - - -@disable_repeat_between_setup -class MeshCoordLazy(MeshCoord): - """Lazy equivalent of :class:`MeshCoord`.""" - - def setup(self, n_faces, lazy=True): - super().setup(n_faces, lazy=lazy) diff --git a/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py b/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py deleted file mode 100644 index 0af055fa820..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/metadata_manager_factory.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope metadata manager factory benchmark tests.""" - -from iris.common import ( - AncillaryVariableMetadata, - BaseMetadata, - CellMeasureMetadata, - CoordMetadata, - CubeMetadata, - DimCoordMetadata, - metadata_manager_factory, -) - - -class MetadataManagerFactory__create: - params = [1, 10, 100] - - def time_AncillaryVariableMetadata(self, n): - [metadata_manager_factory(AncillaryVariableMetadata) for _ in range(n)] - - def time_BaseMetadata(self, n): - [metadata_manager_factory(BaseMetadata) for _ in range(n)] - - def time_CellMeasureMetadata(self, n): - [metadata_manager_factory(CellMeasureMetadata) for _ in range(n)] - - def time_CoordMetadata(self, n): - [metadata_manager_factory(CoordMetadata) for _ in range(n)] - - def time_CubeMetadata(self, n): - [metadata_manager_factory(CubeMetadata) for _ in range(n)] - - def time_DimCoordMetadata(self, n): - [metadata_manager_factory(DimCoordMetadata) for _ in range(n)] - - -class MetadataManagerFactory: - def setup(self): - self.ancillary = metadata_manager_factory(AncillaryVariableMetadata) - self.base = metadata_manager_factory(BaseMetadata) - self.cell = metadata_manager_factory(CellMeasureMetadata) - self.coord = metadata_manager_factory(CoordMetadata) - self.cube = metadata_manager_factory(CubeMetadata) - self.dim = metadata_manager_factory(DimCoordMetadata) - - def time_AncillaryVariableMetadata_fields(self): - self.ancillary.fields - - def time_AncillaryVariableMetadata_values(self): - self.ancillary.values - - def time_BaseMetadata_fields(self): - self.base.fields - - def time_BaseMetadata_values(self): - self.base.values - - def time_CellMeasuresMetadata_fields(self): - self.cell.fields - - def time_CellMeasuresMetadata_values(self): - self.cell.values - - def time_CoordMetadata_fields(self): - self.coord.fields - - def time_CoordMetadata_values(self): - self.coord.values - - def time_CubeMetadata_fields(self): - self.cube.fields - - def time_CubeMetadata_values(self): - self.cube.values - - def time_DimCoordMetadata_fields(self): - self.dim.fields - - def time_DimCoordMetadata_values(self): - self.dim.values diff --git a/benchmarks/benchmarks_iris/unit_style/mixin.py b/benchmarks/benchmarks_iris/unit_style/mixin.py deleted file mode 100644 index 92de5e7ad95..00000000000 --- a/benchmarks/benchmarks_iris/unit_style/mixin.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Small-scope CFVariableMixin benchmark tests.""" - -import numpy as np - -from iris import coords -from iris.common.metadata import AncillaryVariableMetadata - -LONG_NAME = "air temperature" -STANDARD_NAME = "air_temperature" -VAR_NAME = "air_temp" -UNITS = "degrees" -ATTRIBUTES = dict(a=1) -DICT = dict( - standard_name=STANDARD_NAME, - long_name=LONG_NAME, - var_name=VAR_NAME, - units=UNITS, - attributes=ATTRIBUTES, -) -METADATA = AncillaryVariableMetadata(**DICT) -TUPLE = tuple(DICT.values()) - - -class CFVariableMixin: - def setup(self): - data_1d = np.zeros(1000) - - # These benchmarks are from a user perspective, so using a user-level - # subclass of CFVariableMixin to test behaviour. AncillaryVariable is - # the simplest so using that. - self.cfm_proxy = coords.AncillaryVariable(data_1d) - self.cfm_proxy.long_name = "test" - - def time_get_long_name(self): - self.cfm_proxy.long_name - - def time_set_long_name(self): - self.cfm_proxy.long_name = LONG_NAME - - def time_get_standard_name(self): - self.cfm_proxy.standard_name - - def time_set_standard_name(self): - self.cfm_proxy.standard_name = STANDARD_NAME - - def time_get_var_name(self): - self.cfm_proxy.var_name - - def time_set_var_name(self): - self.cfm_proxy.var_name = VAR_NAME - - def time_get_units(self): - self.cfm_proxy.units - - def time_set_units(self): - self.cfm_proxy.units = UNITS - - def time_get_attributes(self): - self.cfm_proxy.attributes - - def time_set_attributes(self): - self.cfm_proxy.attributes = ATTRIBUTES - - def time_get_metadata(self): - self.cfm_proxy.metadata - - def time_set_metadata__dict(self): - self.cfm_proxy.metadata = DICT - - def time_set_metadata__tuple(self): - self.cfm_proxy.metadata = TUPLE - - def time_set_metadata__metadata(self): - self.cfm_proxy.metadata = METADATA From b185940004807dce4e9a95b58cfc19adbcd7ef45 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:27:11 +0200 Subject: [PATCH 21/22] where is iris style lock? --- noxfile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index def44ff487e..a90ab3bf304 100644 --- a/noxfile.py +++ b/noxfile.py @@ -129,13 +129,12 @@ def prepare_venv(session: nox.sessions.Session) -> None: """ lockfile = session_lockfile(session) - print(f"prepare_venv: {lockfile}") venv_dir = session.virtualenv.location_name if not venv_populated(session): # environment has been created but packages not yet installed # populate the environment from the lockfile - logger.debug(f"Populating conda env at {venv_dir}") + logger.debug(f"Populating conda env at {venv_dir} using {lockfile}") session.conda_install("--file", str(lockfile)) cache_venv(session) From 3eab7c0f934fac71760f164c17a36350e8a853c0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:51:31 +0200 Subject: [PATCH 22/22] locks in ci folder --- .github/workflows/benchmarks_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml index d840ca1a4f3..cd88559c13b 100644 --- a/.github/workflows/benchmarks_run.yml +++ b/.github/workflows/benchmarks_run.yml @@ -36,7 +36,7 @@ jobs: uses: marceloprado/has-changed-path@df1b7a3161b8fb9fd8c90403c66a9e66dfde50cb with: # SEE ALSO .github/labeler.yml . - paths: requirements/locks/*.lock + paths: ci/requirements/locks/*.lock - id: overnight name: Check overnight scenario if: github.event_name != 'pull_request'