diff --git a/CHANGES.md b/CHANGES.md index ef7bdfa1d..53a75db2b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,16 @@ ### titiler.xarray +* use `sel={dim}={method}::{value}` notation to specify selector method instead of `sel-method` query-parameter **breaking change** + + ```python + # before + .../info?tore.zarr?sel=time=2023-01-01&sel_method=nearest` + + # now + .../info?tore.zarr?sel=time=nearest::2023-01-01` + ``` + * add `/validate` endpoint via `ValidateExtension` extension * add `Latitude` and `Longitude` as compatible spatial dimensions (@abarciauskas-bgse, https://github.com/developmentseed/titiler/pull/1268) diff --git a/dev_notebooks/rendering.ipynb b/dev_notebooks/rendering.ipynb new file mode 100644 index 000000000..a38b3d472 --- /dev/null +++ b/dev_notebooks/rendering.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "78d17219", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/vincentsarago/Dev/Devseed/titiler/.venv/lib/python3.13/site-packages/rasterio/io.py:140: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.\n", + " rd = DatasetReader(mempath, driver=driver, sharing=sharing, **kwargs)\n" + ] + } + ], + "source": [ + "import numpy\n", + "\n", + "from titiler.core.resources.enums import ImageType\n", + "from titiler.core.utils import render_image\n", + "from matplotlib.pyplot import imshow\n", + "\n", + "from rasterio.io import MemoryFile\n", + "\n", + "from rio_tiler.models import ImageData\n", + "\n", + "# Partial alpha values\n", + "cm = {\n", + " 1: (0, 0, 0, 0),\n", + " 500: (100, 100, 100, 50),\n", + " 1000: (255, 255, 255, 255),\n", + "}\n", + "data = numpy.zeros((1, 256, 256), dtype=\"float32\") + 1\n", + "data[0, 0, 0] = 0\n", + "d = numpy.ma.masked_equal(data, 0)\n", + "d[0, 1:, 1:] = 1\n", + "d[0, 2:, 2:] = 500\n", + "d[0, 3:, 3:] = 1000\n", + "\n", + "img = ImageData(d)\n", + "content, media = render_image(\n", + " img,\n", + " output_format=ImageType.png,\n", + " colormap=cm,\n", + ")\n", + "assert media == \"image/png\"\n", + "\n", + "with MemoryFile(content) as mem:\n", + " with mem.open() as dst:\n", + " data_converted = dst.read()\n", + " assert dst.count == 4\n", + " assert dst.dtypes == (\"uint8\", \"uint8\", \"uint8\", \"uint8\")\n", + " assert data_converted[:, 0, 0].tolist() == [\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " ] # Masked from Original Mask | set to UINT8 (0)\n", + " assert data_converted[:, 1, 1].tolist() == [0, 0, 0, 0] # Masked from CMAP\n", + " assert data_converted[:, 2, 2].tolist() == [\n", + " 100,\n", + " 100,\n", + " 100,\n", + " 50,\n", + " ] # Partially masked from CMAP\n", + " assert data_converted[:, 3, 3].tolist() == [255, 255, 255, 255]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f853aedb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "masked_array(\n", + " data=[[--, 1.0, 1.0, 1.0, 1.0],\n", + " [1.0, 1.0, 1.0, 1.0, 1.0],\n", + " [1.0, 1.0, 500.0, 500.0, 500.0],\n", + " [1.0, 1.0, 500.0, 1000.0, 1000.0],\n", + " [1.0, 1.0, 500.0, 1000.0, 1000.0]],\n", + " mask=[[ True, False, False, False, False],\n", + " [False, False, False, False, False],\n", + " [False, False, False, False, False],\n", + " [False, False, False, False, False],\n", + " [False, False, False, False, False]],\n", + " fill_value=0.0,\n", + " dtype=float32)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[0, 0:5, 0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "308282ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "imshow(d[0, 0:4, 0:4])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "441b642e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "imshow(data_converted[3, 0:4, 0:4])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "20906022", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 0, 0, 0],\n", + " [ 0, 0, 0, 0],\n", + " [ 0, 0, 50, 50],\n", + " [ 0, 0, 50, 255]], dtype=uint8)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_converted[3, 0:4, 0:4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4274d194", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b34d1bb2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11c3c53a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "titiler (3.13.9)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 3c5d0ad38..75ba1502e 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -93,6 +93,7 @@ nav: - NumpyTile: "examples/notebooks/Working_with_NumpyTile.ipynb" - Algorithm: "examples/notebooks/Working_with_Algorithm.ipynb" - Statistics: "examples/notebooks/Working_with_Statistics.ipynb" + - Xarray: "examples/notebooks/Working_with_Zarr.ipynb" - API: - titiler.core: diff --git a/docs/src/advanced/dependencies.md b/docs/src/advanced/dependencies.md index 555134a8a..4b3839cfe 100644 --- a/docs/src/advanced/dependencies.md +++ b/docs/src/advanced/dependencies.md @@ -1019,7 +1019,6 @@ Define options to select a **variable** within a Xarray Dataset. | ------ | ---------- |----------|-------------- | **variable** | Query (str) | Yes | None | **sel** | Query (list of str) | No | None -| **method** | Query (str)| No | None
@@ -1033,15 +1032,7 @@ class XarrayDsParams(DefaultDependency): sel: Annotated[ Optional[List[SelDimStr]], Query( - description="Xarray Indexing using dimension names `{dimension}={value}`.", - ), - ] = None - - method: Annotated[ - Optional[Literal["nearest", "pad", "ffill", "backfill", "bfill"]], - Query( - alias="sel_method", - description="Xarray indexing method to use for inexact matches.", + description="Xarray Indexing using dimension names `{dimension}={value}` or `{dimension}={method}::{value}`.", ), ] = None ``` @@ -1059,7 +1050,6 @@ Combination of `XarrayIOParams` and `XarrayDsParams` | **decode_times** | Query (bool)| No | None | **variable** | Query (str) | Yes | None | **sel** | Query (list of str) | No | None -| **method** | Query (str)| No | None
@@ -1083,7 +1073,6 @@ same as `XarrayParams` but with optional `variable` option. | **decode_times** | Query (bool)| No | None | **variable** | Query (str) | No | None | **sel** | Query (list of str) | No | None -| **method** | Query (str)| No | None
diff --git a/docs/src/examples/notebooks/Working_with_Zarr.ipynb b/docs/src/examples/notebooks/Working_with_Zarr.ipynb new file mode 100644 index 000000000..f1939df86 --- /dev/null +++ b/docs/src/examples/notebooks/Working_with_Zarr.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Working with Zarr" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Intro\n", + "\n", + "`titiler.xarray` is a submodule designed specifically for working with multidimensional dataset. With version `0.25.0`, we've introduced a default application with only support for Zarr dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-06T14:25:40.161502Z", + "start_time": "2023-04-06T14:25:40.153667Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# setup\n", + "import httpx\n", + "import json\n", + "from IPython.display import Image\n", + "\n", + "# Developmentseed Demo endpoint. Please be kind. Ref: https://github.com/developmentseed/titiler/discussions/1223\n", + "# titiler_endpoint = \"https://xarray.titiler.xyz\"\n", + "\n", + "# Or launch your own local instance with:\n", + "# uv run --group server uvicorn titiler.xarray.main:app --host 127.0.0.1 --port 8080 --reload\n", + "titiler_endpoint = \"http://127.0.0.1:8080\"\n", + "\n", + "zarr_url = \"https://nasa-power.s3.us-west-2.amazonaws.com/syn1deg/temporal/power_syn1deg_monthly_temporal_lst.zarr\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset Metadata\n", + "\n", + "The `/dataset/dict` endpoint returns general metadata about the Zarr Dataset\n", + "\n", + "Endpoint: `/dataset/dict`\n", + "\n", + "QueryParams:\n", + "- **url**: Zarr store URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-06T14:25:42.410135Z", + "start_time": "2023-04-06T14:25:42.355858Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/dataset/dict\",\n", + " params={\n", + " \"url\": zarr_url,\n", + " },\n", + ").json()\n", + "\n", + "print(json.dumps(r, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### List of available variables\n", + "\n", + "Endpoint: `/dataset/keys`\n", + "\n", + "QueryParams:\n", + "- **url**: Zarr store URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/dataset/keys\",\n", + " params={\n", + " \"url\": zarr_url,\n", + " },\n", + ").json()\n", + "\n", + "print(json.dumps(r, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variable Info\n", + "\n", + "We can use `/info` endpoint to get more `Geo` information about a specific variable.\n", + "\n", + "QueryParams:\n", + "- **url**: Zarr store URL\n", + "- **variable**: Variable's name (e.g `AIRMASS`, found in `/dataset/keys` response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/info\",\n", + " params={\"url\": zarr_url, \"variable\": \"AIRMASS\"},\n", + ").json()\n", + "\n", + "print(json.dumps(r, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or as a GeoJSON feature" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/info.geojson\",\n", + " params={\"url\": zarr_url, \"variable\": \"AIRMASS\"},\n", + ").json()\n", + "\n", + "print(json.dumps(r, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Knowledge\n", + "\n", + "Looking at the `info` response we can see that the `AIRMASS` variable has `348` (count) bands, each one corresponding to as specific `TIME` (day).\n", + "\n", + "We can also see that the data is stored as `float32` which mean that we will have to apply linear rescaling in order to get output image as PNG/JPEG.\n", + "\n", + "The `min/max` values are also indicated with `valid_max=31.73` and `valid_min=1.0`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dimension Reduction\n", + "\n", + "We cannot visualize all the `bands` at once, so we need to perform dimension reduction to go from array in shape (348, 360, 180) to a 1b (1, 360, 180) or 3b (3, 360, 180) image. \n", + "\n", + "To do it, we have two methods whitin `titiler.xarray`:\n", + "- using `bidx=`: same as for COG we can select a band index\n", + "- using `sel={dimension}=value`: which will be using xarray `.sel` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/bbox/-180,-90,180,90.png\",\n", + " params=(\n", + " (\"url\", zarr_url),\n", + " (\"variable\", \"AIRMASS\"),\n", + " # Select 1 specific band\n", + " (\"bidx\", 50),\n", + " (\"rescale\", \"1,20\"),\n", + " (\"colormap_name\", \"viridis\"),\n", + " ),\n", + " timeout=10,\n", + ")\n", + "\n", + "Image(r.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/bbox/-180,-90,180,90.png\",\n", + " params=(\n", + " (\"url\", zarr_url),\n", + " (\"variable\", \"AIRMASS\"),\n", + " # Select 1 specific time slices\n", + " (\"sel\", \"time=2003-06-30\"),\n", + " (\"rescale\", \"1,20\"),\n", + " (\"colormap_name\", \"viridis\"),\n", + " ),\n", + " timeout=10,\n", + ")\n", + "\n", + "Image(r.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r = httpx.get(\n", + " f\"{titiler_endpoint}/bbox/-180,-90,180,90.png\",\n", + " params=(\n", + " (\"url\", zarr_url),\n", + " (\"variable\", \"AIRMASS\"),\n", + " # Select 3 specific time slices to create a 3 band image\n", + " (\"sel\", \"time=2003-06-30\"),\n", + " (\"sel\", \"time=2004-06-30\"),\n", + " (\"sel\", \"time=2005-06-30\"),\n", + " (\"rescale\", \"1,10\"),\n", + " ),\n", + " timeout=10,\n", + ")\n", + "\n", + "Image(r.content)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3.13 (3.13.7)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/src/titiler/xarray/tests/test_dependencies.py b/src/titiler/xarray/tests/test_dependencies.py index 1453860c9..01fd73d8a 100644 --- a/src/titiler/xarray/tests/test_dependencies.py +++ b/src/titiler/xarray/tests/test_dependencies.py @@ -1,8 +1,6 @@ """test dependencies.""" -from typing import Annotated - -from fastapi import Depends, FastAPI, Path +from fastapi import Depends, FastAPI from starlette.testclient import TestClient from titiler.xarray import dependencies @@ -12,65 +10,50 @@ def test_xarray_tile(): """Create App.""" app = FastAPI() - @app.get("/tiles/{z}/{x}/{y}") - def tiles( - z: Annotated[ - int, - Path( - description="Identifier (Z) selecting one of the scales defined in the TileMatrixSet and representing the scaleDenominator the tile.", - ), - ], - x: Annotated[ - int, - Path( - description="Column (X) index of the tile on the selected TileMatrix. It cannot exceed the MatrixHeight-1 for the selected TileMatrix.", - ), - ], - y: Annotated[ - int, - Path( - description="Row (Y) index of the tile on the selected TileMatrix. It cannot exceed the MatrixWidth-1 for the selected TileMatrix.", - ), - ], + @app.get("/") + def endpoint( params=Depends(dependencies.CompatXarrayParams), ): """return params.""" return params.as_dict() with TestClient(app) as client: - response = client.get("/tiles/1/2/3") + response = client.get("/") params = response.json() assert params == {} - response = client.get("/tiles/1/2/3", params={"variable": "yo"}) + response = client.get("/", params={"variable": "yo"}) params = response.json() assert params == {"variable": "yo"} - response = client.get("/tiles/1/2/3", params={"sel": "yo=yo"}) + response = client.get("/", params={"sel": "yo=yo"}) params = response.json() assert params == {"sel": ["yo=yo"]} - response = client.get("/tiles/1/2/3", params={"sel": "yo=1.0"}) + response = client.get("/", params={"sel": "yo=1.0"}) params = response.json() assert params == {"sel": ["yo=1.0"]} - response = client.get("/tiles/1/2/3", params={"sel": ["yo=yo", "ye=ye"]}) + response = client.get("/", params={"sel": ["yo=yo", "ye=ye"]}) params = response.json() assert params == {"sel": ["yo=yo", "ye=ye"]} - response = client.get("/tiles/1/2/3?sel=yo=yo&sel=ye=ye") + response = client.get("/?sel=yo=yo&sel=ye=ye") params = response.json() assert params == {"sel": ["yo=yo", "ye=ye"]} - response = client.get("/tiles/1/2/3", params={"sel": "yo"}) + response = client.get("/", params={"sel": "yo"}) + assert response.status_code == 422 + + response = client.get("/", params={"sel": "=yo"}) assert response.status_code == 422 - response = client.get("/tiles/1/2/3", params={"sel": "=yo"}) + response = client.get("/", params={"sel": "yo="}) assert response.status_code == 422 - response = client.get("/tiles/1/2/3", params={"sel": "yo="}) + response = client.get("/", params={"sel": "time=near::2023-01-01"}) assert response.status_code == 422 - response = client.get("/tiles/1/2/3", params={"sel_method": "nearest"}) + response = client.get("/", params={"sel": ["yo=nearest::yo", "ye=ye"]}) params = response.json() - assert params == {"method": "nearest"} + assert params == {"sel": ["yo=nearest::yo", "ye=ye"]} diff --git a/src/titiler/xarray/tests/test_factory.py b/src/titiler/xarray/tests/test_factory.py index d760cbe7a..a09e38439 100644 --- a/src/titiler/xarray/tests/test_factory.py +++ b/src/titiler/xarray/tests/test_factory.py @@ -175,8 +175,7 @@ def test_info_da_options(app): params={ "url": dataset_4d_nc, "variable": "dataset", - "sel": "z=1", - "sel_method": "nearest", + "sel": "z=nearest::1", }, ) assert resp.status_code == 200 diff --git a/src/titiler/xarray/tests/test_io_tools.py b/src/titiler/xarray/tests/test_io_tools.py index bc0ae063e..24d98fbf6 100644 --- a/src/titiler/xarray/tests/test_io_tools.py +++ b/src/titiler/xarray/tests/test_io_tools.py @@ -9,7 +9,13 @@ import pytest import xarray -from titiler.xarray.io import Reader, fs_open_dataset, get_variable, open_zarr +from titiler.xarray.io import ( + Reader, + _parse_dsl, + fs_open_dataset, + get_variable, + open_zarr, +) prefix = os.path.join(os.path.dirname(__file__), "fixtures") @@ -53,8 +59,7 @@ def test_get_variable(): da = get_variable( ds, "dataset", - sel=["time=2022-12-01", "time=2023-01-01"], - method="nearest", + sel=["time=nearest::2022-12-01", "time=nearest::2023-01-01"], ) assert da.rio.crs assert da.dims == ("time", "y", "x") @@ -70,7 +75,7 @@ def test_get_variable(): assert da["time"][1] == numpy.datetime64("2023-01-01") # Select the Nearest Time - da = get_variable(ds, "dataset", sel=["time=2024-01-01T01:00:00"], method="nearest") + da = get_variable(ds, "dataset", sel=["time=nearest::2024-01-01T01:00:00"]) assert da.rio.crs assert da.dims == ("y", "x") assert da["time"] == numpy.datetime64("2023-01-01") @@ -186,20 +191,20 @@ def test_get_variable_datetime_tz(): assert data.dims == ("time", "y", "x") ds = data.to_dataset(name="dataset") - da = get_variable(ds, "dataset", sel=["time=2023-01-01T00:00:00"], method="nearest") + da = get_variable(ds, "dataset", sel=["time=nearest::2023-01-01T00:00:00"]) assert da.rio.crs assert da.dims == ("y", "x") assert da["time"] == numpy.datetime64("2023-01-01") - da = get_variable( - ds, "dataset", sel=["time=2023-01-01T00:00:00Z"], method="nearest" - ) + da = get_variable(ds, "dataset", sel=["time=nearest::2023-01-01T00:00:00Z"]) assert da.rio.crs assert da.dims == ("y", "x") assert da["time"] == numpy.datetime64("2023-01-01") da = get_variable( - ds, "dataset", sel=["time=2023-01-01T00:00:00+03:00"], method="nearest" + ds, + "dataset", + sel=["time=nearest::2023-01-01T00:00:00+03:00"], ) assert da.rio.crs assert da.dims == ("y", "x") @@ -346,3 +351,50 @@ def test_io_open_zarr(src_path, options): """test open_zarr with cloud hosted files.""" with open_zarr(src_path, **options) as ds: assert list(ds.data_vars) + + +@pytest.mark.parametrize( + "sel,expected", + [ + ( + ["time=2022-01-01", "level=10"], + [ + {"dimension": "time", "values": ["2022-01-01"], "method": None}, + {"dimension": "level", "values": ["10"], "method": None}, + ], + ), + ( + ["time=2022-01-01", "time=2022-01-02"], + [ + { + "dimension": "time", + "values": ["2022-01-01", "2022-01-02"], + "method": None, + }, + ], + ), + ( + ["time=pad::2022-01-01", "time=2022-01-02", "level=nearest::10"], + [ + { + "dimension": "time", + "values": ["2022-01-01", "2022-01-02"], + "method": "pad", + }, + {"dimension": "level", "values": ["10"], "method": "nearest"}, + ], + ), + ([], []), + ], +) +def test_parse_dsl(sel, expected): + """test _parse_dsl function.""" + result = _parse_dsl(sel) + assert result == expected + + +def test_parse_dsl_invalid(): + """Should raise a ValueError when multiple methods are set for a dimension.""" + sel = ["time=pad::2022-01-01", "time=nearest::2022-01-02"] + with pytest.raises(ValueError): + _parse_dsl(sel) diff --git a/src/titiler/xarray/titiler/xarray/dependencies.py b/src/titiler/xarray/titiler/xarray/dependencies.py index 758a2ffb7..3370e27ba 100644 --- a/src/titiler/xarray/titiler/xarray/dependencies.py +++ b/src/titiler/xarray/titiler/xarray/dependencies.py @@ -1,7 +1,7 @@ """titiler.xarray dependencies.""" from dataclasses import dataclass -from typing import Annotated, List, Literal, Optional, Union +from typing import Annotated, List, Optional, Union import numpy from fastapi import Query @@ -32,7 +32,12 @@ class XarrayIOParams(DefaultDependency): ] = None -SelDimStr = Annotated[str, StringConstraints(pattern=r"^[^=]+=[^=]+$")] +SelDimStr = Annotated[ + str, + StringConstraints( + pattern=r"^[^=]+=((nearest|pad|ffill|backfill|bfill)::)?[^=::]+$" + ), +] @dataclass @@ -44,15 +49,7 @@ class XarrayDsParams(DefaultDependency): sel: Annotated[ Optional[List[SelDimStr]], Query( - description="Xarray Indexing using dimension names `{dimension}={value}`.", - ), - ] = None - - method: Annotated[ - Optional[Literal["nearest", "pad", "ffill", "backfill", "bfill"]], - Query( - alias="sel_method", - description="Xarray indexing method to use for inexact matches.", + description="Xarray Indexing using dimension names `{dimension}={value}` or `{dimension}={method}::{value}`.", ), ] = None @@ -80,15 +77,7 @@ class CompatXarrayParams(XarrayIOParams): sel: Annotated[ Optional[List[SelDimStr]], Query( - description="Xarray Indexing using dimension names `{dimension}={value}`.", - ), - ] = None - - method: Annotated[ - Optional[Literal["nearest", "pad", "ffill", "backfill", "bfill"]], - Query( - alias="sel_method", - description="Xarray indexing method to use for inexact matches.", + description="Xarray Indexing using dimension names `{dimension}={value}` or `{dimension}={method}::{value}`.", ), ] = None diff --git a/src/titiler/xarray/titiler/xarray/io.py b/src/titiler/xarray/titiler/xarray/io.py index c03d903c0..971767e8e 100644 --- a/src/titiler/xarray/titiler/xarray/io.py +++ b/src/titiler/xarray/titiler/xarray/io.py @@ -17,6 +17,7 @@ from morecantile import TileMatrixSet from rio_tiler.constants import WEB_MERCATOR_TMS from rio_tiler.io.xarray import XarrayReader +from typing_extensions import TypedDict from zarr.storage import ObjectStore X_DIM_NAMES = ["lon", "longitude", "LON", "LONGITUDE", "Lon", "Longitude"] @@ -139,11 +140,68 @@ def _arrange_dims(da: xarray.DataArray) -> xarray.DataArray: return da +class selector(TypedDict): + """STAC Item.""" + + dimension: str + values: list[Any] + method: Literal["nearest", "pad", "ffill", "backfill", "bfill"] | None + + +def _parse_dsl(sel: list[str] | None) -> list[selector]: + """Parse sel DSL into dictionary. + + Args: + sel (list of str, optional): List of Xarray Indexes. + + Returns: + list: list of dimension/values/method. + + """ + sel = sel or [] + + _idx: Dict[str, List] = {} + for s in sel: + val: Union[str, slice] + dim, val = s.split("=") + + if dim in _idx: + _idx[dim].append(val) + else: + _idx[dim] = [val] + + # Loop through all dimension=values selectors + # - parse method::value if provided + # - check if multiple methods are provided for the same dimension + # - cast values to the dimension dtype + # - apply the selection + selectors: list[selector] = [] + for dimension, values in _idx.items(): + methods, values = zip( # type: ignore + *[v.split("::", 1) if "::" in v else (None, v) for v in values] + ) + method_sets = {m for m in methods if m is not None} + if len(method_sets) > 1: + raise ValueError( + f"Multiple selection methods provided for dimension {dimension}: {methods}" + ) + method = method_sets.pop() if method_sets else None + + selectors.append( + { + "dimension": dimension, + "values": list(values), + "method": method, + } + ) + + return selectors + + def get_variable( ds: xarray.Dataset, variable: str, sel: Optional[List[str]] = None, - method: Optional[Literal["nearest", "pad", "ffill", "backfill", "bfill"]] = None, ) -> xarray.DataArray: """Get Xarray variable as DataArray. @@ -159,23 +217,20 @@ def get_variable( """ da = ds[variable] - if sel: - _idx: Dict[str, List] = {} - for s in sel: - val: Union[str, slice] - dim, val = s.split("=") + for selector in _parse_dsl(sel): + dimension = selector["dimension"] + values = selector["values"] + method = selector["method"] - # cast string to dtype of the dimension - if da[dim].dtype != "O": - val = da[dim].dtype.type(val) + # TODO: add more casting + # cast string to dtype of the dimension + if da[dimension].dtype != "O": + values = [da[dimension].dtype.type(v) for v in values] - if dim in _idx: - _idx[dim].append(val) - else: - _idx[dim] = [val] - - sel_idx = {k: v[0] if len(v) < 2 else v for k, v in _idx.items()} - da = da.sel(sel_idx, method=method) + da = da.sel( + {dimension: values[0] if len(values) < 2 else values}, + method=method, + ) da = _arrange_dims(da) @@ -235,7 +290,6 @@ def __attrs_post_init__(self): self.ds, self.variable, sel=self.sel, - method=self.method, ) super().__attrs_post_init__()