diff --git a/.github/workflows/ci-i386.yml b/.github/workflows/ci-i386.yml index 1cf1f20f..34885e24 100644 --- a/.github/workflows/ci-i386.yml +++ b/.github/workflows/ci-i386.yml @@ -46,15 +46,6 @@ jobs: uv pip install -v -e .[test,test_extras,msgpack,crc32c] shell: alpine.sh {0} - - - name: Install zarr-python - # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out - # so we can have some tests of our minimum version of numpy (1.24) - if: matrix.python-version != '3.11' - run: uv add zarr>=3 - shell: alpine.sh {0} - - - name: List installed packages run: uv pip list shell: alpine.sh {0} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7eafcf43..50cd113e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -52,12 +52,6 @@ jobs: export DISABLE_NUMCODECS_AVX2="" python -m pip install -v -e .[test,test_extras,msgpack,crc32c,pcodec,zfpy] - - name: Install zarr-python - # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out - # so we can have some tests of our minimum version of numpy (1.24) - if: matrix.python-version != '3.11' - run: python -m pip install zarr>=3 - - name: List installed packages run: python -m pip list @@ -75,6 +69,8 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + matrix: + zarr-version: ["312", "313"] defaults: run: @@ -93,10 +89,10 @@ jobs: pixi-version: v0.49.0 cache: false - - name: List deps + - name: List installed packages shell: "bash -l {0}" - run: pixi run -e default hatch run test:list-deps + run: pixi run ls-deps-${{matrix.zarr-version}} - - name: Run tests + - name: Run tests with Zarr ${{ matrix.zarr-version }} shell: "bash -l {0}" - run: pixi run -e default hatch run test:test-zarr \ No newline at end of file + run: pixi run test-zarr-${{ matrix.zarr-version }} \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c08b8f8a..2de0b1f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,4 +30,4 @@ repos: hooks: - id: mypy args: [--config-file, pyproject.toml] - additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3'] + additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.1.3'] diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 6102939e..9aac0e3c 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -1,318 +1,48 @@ from __future__ import annotations -import pickle from typing import TYPE_CHECKING -import numpy as np import pytest -import numcodecs.bitround - if TYPE_CHECKING: # pragma: no cover import zarr else: - zarr = pytest.importorskip("zarr") - -import zarr.storage -from zarr.core.common import JSON - -import numcodecs.zarr3 - -pytestmark = [ - pytest.mark.skipif(zarr.__version__ < "3.0.0", reason="zarr 3.0.0 or later is required"), - pytest.mark.filterwarnings("ignore:Codec 'numcodecs.*' not configured in config.*:UserWarning"), - pytest.mark.filterwarnings( - "ignore:Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations." - ), + zarr = pytest.importorskip("zarr", "3.1.3") + +import numcodecs.zarr3 as zarr3 + +codec_names = [ + "BZ2", + "CRC32", + "CRC32C", + "LZ4", + "LZMA", + "ZFPY", + "Adler32", + "AsType", + "BitRound", + "Blosc", + "Delta", + "FixedScaleOffset", + "Fletcher32", + "GZip", + "JenkinsLookup3", + "PCodec", + "PackBits", + "Quantize", + "Shuffle", + "Zlib", + "Zstd", ] -get_codec_class = zarr.registry.get_codec_class -Array = zarr.Array -BytesCodec = zarr.codecs.BytesCodec -Store = zarr.abc.store.Store -MemoryStore = zarr.storage.MemoryStore -StorePath = zarr.storage.StorePath - - -EXPECTED_WARNING_STR = "Numcodecs codecs are not in the Zarr version 3.*" - - -@pytest.fixture -def store() -> StorePath: - return StorePath(MemoryStore(read_only=False)) - - -ALL_CODECS = [getattr(numcodecs.zarr3, cls_name) for cls_name in numcodecs.zarr3.__all__] - - -@pytest.mark.parametrize("codec_class", ALL_CODECS) -def test_entry_points(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): - codec_name = codec_class.codec_name - assert get_codec_class(codec_name) == codec_class - - -@pytest.mark.parametrize("codec_class", ALL_CODECS) -def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): - assert "See :class:`numcodecs." in codec_class.__doc__ # type: ignore[operator] - - -@pytest.mark.parametrize( - "codec_class", - [ - numcodecs.zarr3.Blosc, - numcodecs.zarr3.LZ4, - numcodecs.zarr3.Zstd, - numcodecs.zarr3.Zlib, - numcodecs.zarr3.GZip, - numcodecs.zarr3.BZ2, - numcodecs.zarr3.LZMA, - numcodecs.zarr3.Shuffle, - ], -) -def test_generic_compressor( - store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsBytesBytesCodec] -): - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - compressors=[codec_class()], - ) - - a[:, :] = data.copy() - np.testing.assert_array_equal(data, a[:, :]) - - -@pytest.mark.parametrize( - ("codec_class", "codec_config"), - [ - (numcodecs.zarr3.Delta, {"dtype": "float32"}), - (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 25.5}), - (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}), - (numcodecs.zarr3.AsType, {"encode_dtype": "float32", "decode_dtype": "float32"}), - ], - ids=[ - "delta", - "fixedscaleoffset", - "fixedscaleoffset2", - "astype", - ], -) -def test_generic_filter( - store: StorePath, - codec_class: type[numcodecs.zarr3._NumcodecsArrayArrayCodec], - codec_config: dict[str, JSON], -): - data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - filters=[ - codec_class(**codec_config), - ], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic", mode="r") - np.testing.assert_array_equal(data, a[:, :]) - - -def test_generic_filter_bitround(store: StorePath): - data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic_bitround", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - filters=[numcodecs.zarr3.BitRound(keepbits=3)], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic_bitround", mode="r") - assert np.allclose(data, a[:, :], atol=0.1) - - -def test_generic_filter_quantize(store: StorePath): - data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic_quantize", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - filters=[numcodecs.zarr3.Quantize(digits=3)], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic_quantize", mode="r") - assert np.allclose(data, a[:, :], atol=0.001) - - -def test_generic_filter_packbits(store: StorePath): - data = np.zeros((16, 16), dtype="bool") - data[0:4, :] = True - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic_packbits", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - filters=[numcodecs.zarr3.PackBits()], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic_packbits", mode="r") - np.testing.assert_array_equal(data, a[:, :]) - - with pytest.raises(ValueError, match=".*requires bool dtype.*"): - zarr.create_array( - store / "generic_packbits_err", - shape=data.shape, - chunks=(16, 16), - dtype="uint32", - fill_value=0, - filters=[numcodecs.zarr3.PackBits()], - ) - - -@pytest.mark.parametrize( - "codec_class", - [ - numcodecs.zarr3.CRC32, - numcodecs.zarr3.CRC32C, - numcodecs.zarr3.Adler32, - numcodecs.zarr3.Fletcher32, - numcodecs.zarr3.JenkinsLookup3, - ], -) -def test_generic_checksum( - store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsBytesBytesCodec] -): - data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic_checksum", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - compressors=[codec_class()], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic_checksum", mode="r") - np.testing.assert_array_equal(data, a[:, :]) - - -@pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY]) -def test_generic_bytes_codec( - store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsArrayBytesCodec] -): - try: - codec_class()._codec # noqa: B018 - except ValueError as e: # pragma: no cover - if "codec not available" in str(e): - pytest.xfail(f"{codec_class.codec_name} is not available: {e}") - else: - raise - except ImportError as e: # pragma: no cover - pytest.xfail(f"{codec_class.codec_name} is not available: {e}") - - data = np.arange(0, 256, dtype="float32").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - serializer=codec_class(), - ) - - a[:, :] = data.copy() - np.testing.assert_array_equal(data, a[:, :]) - - -def test_delta_astype(store: StorePath): - data = np.linspace(0, 10, 256, dtype="i8").reshape((16, 16)) - - with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): - a = zarr.create_array( - store / "generic", - shape=data.shape, - chunks=(16, 16), - dtype=data.dtype, - fill_value=0, - filters=[ - numcodecs.zarr3.Delta(dtype="i8", astype="i2"), - ], - ) - - a[:, :] = data.copy() - a = zarr.open_array(store / "generic", mode="r") - np.testing.assert_array_equal(data, a[:, :]) - - -def test_repr(): - codec = numcodecs.zarr3.LZ4(level=5) - assert repr(codec) == "LZ4(codec_name='numcodecs.lz4', codec_config={'level': 5})" - - -def test_to_dict(): - codec = numcodecs.zarr3.LZ4(level=5) - assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} - - -@pytest.mark.parametrize( - "codec_cls", - [ - numcodecs.zarr3.Blosc, - numcodecs.zarr3.LZ4, - numcodecs.zarr3.Zstd, - numcodecs.zarr3.Zlib, - numcodecs.zarr3.GZip, - numcodecs.zarr3.BZ2, - numcodecs.zarr3.LZMA, - numcodecs.zarr3.Shuffle, - numcodecs.zarr3.BitRound, - numcodecs.zarr3.Delta, - numcodecs.zarr3.FixedScaleOffset, - numcodecs.zarr3.Quantize, - numcodecs.zarr3.PackBits, - numcodecs.zarr3.AsType, - numcodecs.zarr3.CRC32, - numcodecs.zarr3.CRC32C, - numcodecs.zarr3.Adler32, - numcodecs.zarr3.Fletcher32, - numcodecs.zarr3.JenkinsLookup3, - numcodecs.zarr3.PCodec, - numcodecs.zarr3.ZFPY, - ], -) -def test_codecs_pickleable(codec_cls): - codec = codec_cls() - - expected = codec - p = pickle.dumps(codec) - actual = pickle.loads(p) - assert actual == expected +@pytest.mark.parametrize('codec_name', codec_names) +def test_export(codec_name: str) -> None: + """ + Ensure that numcodecs.zarr3 re-exports codecs defined in zarr.codecs.numcodecs + """ + with pytest.warns( + DeprecationWarning, + match="The numcodecs.zarr3 module is deprecated and will be removed in a future release of numcodecs. ", + ): + assert getattr(zarr3, codec_name) == getattr(zarr.codecs.numcodecs, codec_name) diff --git a/numcodecs/tests/test_zarr3_import.py b/numcodecs/tests/test_zarr3_import.py index 3feaf3e1..7ff8cedc 100644 --- a/numcodecs/tests/test_zarr3_import.py +++ b/numcodecs/tests/test_zarr3_import.py @@ -4,7 +4,7 @@ def test_zarr3_import(): - ERROR_MESSAGE_MATCH = "zarr 3.0.0 or later.*" + ERROR_MESSAGE_MATCH = "Zarr 3.1.3 or later*" try: import zarr # noqa: F401 diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 7ed0ac57..d0a66039 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -1,375 +1,67 @@ """ -This module provides the compatibility for :py:mod:`numcodecs` in Zarr version 3. - -A compatibility module is required because the codec handling in Zarr version 3 is different from Zarr version 2. - -You can use codecs from :py:mod:`numcodecs` by constructing codecs from :py:mod:`numcodecs.zarr3` using the same parameters as the original codecs. - ->>> import zarr ->>> import numcodecs.zarr3 ->>> ->>> array = zarr.create_array( -... store="data.zarr", -... shape=(1024, 1024), -... chunks=(64, 64), -... dtype="uint32", -... filters=[numcodecs.zarr3.Delta()], -... compressors=[numcodecs.zarr3.BZ2(level=5)]) ->>> array[:] = np.arange(*array.shape).astype(array.dtype) - -.. note:: - - Please note that the codecs in :py:mod:`numcodecs.zarr3` are not part of the Zarr version 3 specification. - Using these codecs might cause interoperability issues with other Zarr implementations. +This module is DEPRECATED. It will may be removed entirely in a future release of Numcodecs. +The codecs exported here are available in Zarr Python >= 3.1.3 """ from __future__ import annotations -import asyncio -import math -from dataclasses import dataclass, replace -from functools import cached_property +import importlib +import warnings from importlib.metadata import version -from typing import Any, Self -from warnings import warn +from typing import Any -import numpy as np from packaging.version import Version -import numcodecs - -try: - import zarr # noqa: F401 - - if Version(version('zarr')) < Version("3.0.0"): # pragma: no cover - raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") -except ImportError as e: # pragma: no cover - raise ImportError( - "zarr 3.0.0 or later is required to use the numcodecs zarr integration." - ) from e - -from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec -from zarr.abc.metadata import Metadata -from zarr.core.array_spec import ArraySpec -from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer -from zarr.core.buffer.cpu import as_numpy_array_wrapper -from zarr.core.common import JSON, parse_named_configuration, product - -CODEC_PREFIX = "numcodecs." - -def _from_zarr_dtype(dtype: Any) -> np.dtype: +def __getattr__(name: str) -> Any: """ - Get a numpy data type from an array spec, depending on the zarr version. + Emit a warning when someone imports from this module """ - if Version(version('zarr')) >= Version("3.1.0"): - return dtype.to_native_dtype() - return dtype # pragma: no cover - - -def _to_zarr_dtype(dtype: np.dtype) -> Any: - if Version(version('zarr')) >= Version("3.1.0"): - from zarr.dtype import parse_data_type - - return parse_data_type(dtype, zarr_format=3) - return dtype # pragma: no cover - - -def _expect_name_prefix(codec_name: str) -> str: - if not codec_name.startswith(CODEC_PREFIX): - raise ValueError( - f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead." - ) # pragma: no cover - return codec_name.removeprefix(CODEC_PREFIX) - - -def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: - parsed_name, parsed_configuration = parse_named_configuration(data) - if not parsed_name.startswith(CODEC_PREFIX): - raise ValueError( - f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead." - ) # pragma: no cover - id = _expect_name_prefix(parsed_name) - return {"id": id, **parsed_configuration} - - -@dataclass(frozen=True) -class _NumcodecsCodec(Metadata): - codec_name: str - codec_config: dict[str, JSON] - - def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): - """To be used only when creating the actual public-facing codec class.""" - super().__init_subclass__(**kwargs) - if codec_name is not None: - namespace = codec_name - - cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" - cls.codec_name = f"{CODEC_PREFIX}{namespace}" - cls.__doc__ = f""" - See :class:`{cls_name}` for more details and parameters. - """ - - def __init__(self, **codec_config: JSON) -> None: - if not self.codec_name: - raise ValueError( - "The codec name needs to be supplied through the `codec_name` attribute." - ) # pragma: no cover - unprefixed_codec_name = _expect_name_prefix(self.codec_name) - - if "id" not in codec_config: - codec_config = {"id": unprefixed_codec_name, **codec_config} - elif codec_config["id"] != unprefixed_codec_name: - raise ValueError( - f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}." - ) # pragma: no cover - - object.__setattr__(self, "codec_config", codec_config) - warn( - "Numcodecs codecs are not in the Zarr version 3 specification and " - "may not be supported by other zarr implementations.", - category=UserWarning, - stacklevel=2, - ) - - @cached_property - def _codec(self) -> numcodecs.abc.Codec: - return numcodecs.get_codec(self.codec_config) - - @classmethod - def from_dict(cls, data: dict[str, JSON]) -> Self: - codec_config = _parse_codec_configuration(data) - return cls(**codec_config) - - def to_dict(self) -> dict[str, JSON]: - codec_config = self.codec_config.copy() - codec_config.pop("id", None) - return { - "name": self.codec_name, - "configuration": codec_config, - } - - def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - raise NotImplementedError # pragma: no cover - - # Override __repr__ because dynamically constructed classes don't seem to work otherwise - def __repr__(self) -> str: - codec_config = self.codec_config.copy() - codec_config.pop("id", None) - return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})" - - -class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec): - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: - return await asyncio.to_thread( - as_numpy_array_wrapper, - self._codec.decode, - chunk_bytes, - chunk_spec.prototype, + if name in __all__: + msg = ( + "The numcodecs.zarr3 module is deprecated and will be removed in a future release of numcodecs. " + f"Import {name} via zarr.codecs.numcodecs.{name} instead. This requires Zarr Python >= 3.1.3. " ) - def _encode(self, chunk_bytes: Buffer, prototype: BufferPrototype) -> Buffer: - encoded = self._codec.encode(chunk_bytes.as_array_like()) - if isinstance(encoded, np.ndarray): # Required for checksum codecs - return prototype.buffer.from_bytes(encoded.tobytes()) - return prototype.buffer.from_bytes(encoded) - - async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: - return await asyncio.to_thread(self._encode, chunk_bytes, chunk_spec.prototype) - - -class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec): - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - async def _decode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: - chunk_ndarray = chunk_array.as_ndarray_like() - out = await asyncio.to_thread(self._codec.decode, chunk_ndarray) - return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) - - async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: - chunk_ndarray = chunk_array.as_ndarray_like() - out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) - return chunk_spec.prototype.nd_buffer.from_ndarray_like(out) - - -class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec): - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - async def _decode_single(self, chunk_buffer: Buffer, chunk_spec: ArraySpec) -> NDBuffer: - chunk_bytes = chunk_buffer.to_bytes() - out = await asyncio.to_thread(self._codec.decode, chunk_bytes) - return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) - - async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) -> Buffer: - chunk_ndarray = chunk_ndbuffer.as_ndarray_like() - out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) - return chunk_spec.prototype.buffer.from_bytes(out) - - -# bytes-to-bytes codecs -class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): - pass - + warnings.warn(msg, DeprecationWarning, stacklevel=2) + module = importlib.import_module("zarr.codecs.numcodecs") + obj = getattr(module, name) + globals()[name] = obj # cache so subsequent lookups skip __getattr__ + return obj + raise AttributeError(f"module {__name__} has no attribute {name}") -class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"): - pass +try: + import zarr # noqa: F401 -class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"): - pass - - -class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"): - pass - - -class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"): - pass - - -class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"): - pass - - -class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): - pass - - -class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: - if self.codec_config.get("elementsize") is None: - dtype = _from_zarr_dtype(array_spec.dtype) - return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize}) - return self # pragma: no cover - - -# array-to-array codecs ("filters") -class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): - def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - if astype := self.codec_config.get("astype"): - dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] - return replace(chunk_spec, dtype=dtype) - return chunk_spec - - -class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): - pass - - -class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): - def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - if astype := self.codec_config.get("astype"): - dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] - return replace(chunk_spec, dtype=dtype) - return chunk_spec - - def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: - if self.codec_config.get("dtype") is None: - dtype = _from_zarr_dtype(array_spec.dtype) - return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)}) - return self - - -class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"): - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: - if self.codec_config.get("dtype") is None: - dtype = _from_zarr_dtype(array_spec.dtype) - return Quantize(**{**self.codec_config, "dtype": str(dtype)}) - return self - - -class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"): - def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - return replace( - chunk_spec, - shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), - dtype=_to_zarr_dtype(np.dtype("uint8")), - ) - - # todo: remove this type: ignore when this class can be defined w.r.t. - # a single zarr dtype API - def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: # type: ignore[override] - _dtype = _from_zarr_dtype(dtype) - if _dtype != np.dtype("bool"): - raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") - - -class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): - def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] - return replace(chunk_spec, dtype=dtype) - - def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: - if self.codec_config.get("decode_dtype") is None: - # TODO: remove these coverage exemptions the correct way, i.e. with tests - dtype = _from_zarr_dtype(array_spec.dtype) # pragma: no cover - return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover - return self - - -# bytes-to-bytes checksum codecs -class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): - def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - return input_byte_length + 4 # pragma: no cover - - -class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): - pass - - -class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"): - pass - - -class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"): - pass - - -class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"): - pass - - -class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): - pass - - -# array-to-bytes codecs -class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"): - pass - - -class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"): - pass - + zarr_version = version('zarr') + if Version(zarr_version) < Version("3.1.3"): # pragma: no cover + msg = f"Zarr 3.1.3 or later is required to use the numcodecs zarr integration. Got {zarr_version}." + raise ImportError(msg) +except ImportError as e: # pragma: no cover + msg = "zarr could not be imported. Zarr 3.1.3 or later is required to use the numcodecs zarr integration." + raise ImportError(msg) from e __all__ = [ - "BZ2", - "CRC32", - "CRC32C", - "LZ4", - "LZMA", - "ZFPY", - "Adler32", - "AsType", - "BitRound", - "Blosc", - "Delta", - "FixedScaleOffset", - "Fletcher32", - "GZip", - "JenkinsLookup3", - "PCodec", - "PackBits", - "Quantize", - "Shuffle", - "Zlib", - "Zstd", + "BZ2", # noqa: F822 + "CRC32", # noqa: F822 + "CRC32C", # noqa: F822 + "LZ4", # noqa: F822 + "LZMA", # noqa: F822 + "ZFPY", # noqa: F822 + "Adler32", # noqa: F822 + "AsType", # noqa: F822 + "BitRound", # noqa: F822 + "Blosc", # noqa: F822 + "Delta", # noqa: F822 + "FixedScaleOffset", # noqa: F822 + "Fletcher32", # noqa: F822 + "GZip", # noqa: F822 + "JenkinsLookup3", # noqa: F822 + "PCodec", # noqa: F822 + "PackBits", # noqa: F822 + "Quantize", # noqa: F822 + "Shuffle", # noqa: F822 + "Zlib", # noqa: F822 + "Zstd", # noqa: F822 ] diff --git a/pyproject.toml b/pyproject.toml index c925cd17..2df8358c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,21 @@ test = [ test_extras = [ "importlib_metadata", ] + +[dependency-groups] +test-zarr-312 = [ + "pytest", + "pytest-cov", + "zarr==3.1.2", + "crc32c", +] +test-zarr-313 = [ + "pytest", + "pytest-cov", + "zarr==3.1.3", + "crc32c", +] + msgpack = [ "msgpack", ] @@ -75,29 +90,6 @@ crc32c = [ "crc32c>=2.7", ] -[project.entry-points."zarr.codecs"] -"numcodecs.blosc" = "numcodecs.zarr3:Blosc" -"numcodecs.lz4" = "numcodecs.zarr3:LZ4" -"numcodecs.zstd" = "numcodecs.zarr3:Zstd" -"numcodecs.zlib" = "numcodecs.zarr3:Zlib" -"numcodecs.gzip" = "numcodecs.zarr3:GZip" -"numcodecs.bz2" = "numcodecs.zarr3:BZ2" -"numcodecs.lzma" = "numcodecs.zarr3:LZMA" -"numcodecs.shuffle" = "numcodecs.zarr3:Shuffle" -"numcodecs.delta" = "numcodecs.zarr3:Delta" -"numcodecs.bitround" = "numcodecs.zarr3:BitRound" -"numcodecs.fixedscaleoffset" = "numcodecs.zarr3:FixedScaleOffset" -"numcodecs.quantize" = "numcodecs.zarr3:Quantize" -"numcodecs.packbits" = "numcodecs.zarr3:PackBits" -"numcodecs.astype" = "numcodecs.zarr3:AsType" -"numcodecs.crc32" = "numcodecs.zarr3:CRC32" -"numcodecs.crc32c" = "numcodecs.zarr3:CRC32C" -"numcodecs.adler32" = "numcodecs.zarr3:Adler32" -"numcodecs.fletcher32" = "numcodecs.zarr3:Fletcher32" -"numcodecs.jenkins_lookup3" = "numcodecs.zarr3:JenkinsLookup3" -"numcodecs.pcodec" = "numcodecs.zarr3:PCodec" -"numcodecs.zfpy" = "numcodecs.zarr3:ZFPY" - [tool.setuptools] package-dir = {"" = "."} packages = ["numcodecs", "numcodecs.tests"] @@ -242,6 +234,15 @@ warn_redundant_casts = true warn_unused_ignores = true warn_unused_configs = true +[tool.uv] +conflicts = [ + # Zarr versions conflict with each other + [ + { group = "test-zarr-312" }, + { group = "test-zarr-313" } + ] +] + [tool.pixi.project] channels = ["conda-forge"] platforms = ["linux-64", "osx-arm64", "osx-64", "win-64"] @@ -250,20 +251,10 @@ platforms = ["linux-64", "osx-arm64", "osx-64", "win-64"] clang = ">=19.1.7,<20" c-compiler = ">=1.9.0,<2" cxx-compiler = ">=1.9.0,<2" -hatch = '==1.14.1' - -[[tool.hatch.envs.test.matrix]] -python = ["3.11"] -zarr = ["3.0.10", "3.1.0"] - -[tool.hatch.envs.test] -dependencies = [ - "zarr=={matrix:zarr}" -] -numpy="==2.2" -features = ["test"] - +uv = "*" -[tool.hatch.envs.test.scripts] -list-deps = "pip list" -test-zarr = "pytest numcodecs/tests/test_zarr3.py numcodecs/tests/test_zarr3_import.py" \ No newline at end of file +[tool.pixi.tasks] +ls-deps-312 = "uv run --group test-zarr-312 uv pip freeze" +ls-deps-313 = "uv run --group test-zarr-313 uv pip freeze" +test-zarr-312 = "uv run --group test-zarr-312 pytest numcodecs/tests/test_zarr3.py numcodecs/tests/test_zarr3_import.py" +test-zarr-313 = "uv run --group test-zarr-313 pytest numcodecs/tests/test_zarr3.py numcodecs/tests/test_zarr3_import.py"