diff --git a/docs/release.rst b/docs/release.rst index bda2f867..cd25fd93 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -25,6 +25,8 @@ Improvements * In ``vlen``, define and use ``const`` ``HEADER_LENGTH``. By :user:`John Kirkham `, :issue:`723` +* All codecs are now pickleable. + By :user:`Tom Nicholas `, :issue:`744` Fixes ~~~~~ diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 0d8ecc74..8e2d1264 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -1,10 +1,13 @@ from __future__ import annotations +import pickle from typing import TYPE_CHECKING import numpy as np import pytest +import numcodecs.bitround + if TYPE_CHECKING: # pragma: no cover import zarr else: @@ -260,7 +263,7 @@ def test_delta_astype(store: StorePath): dtype=data.dtype, fill_value=0, filters=[ - numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] + numcodecs.zarr3.Delta(dtype="i8", astype="i2"), ], ) @@ -277,3 +280,39 @@ def test_repr(): def test_to_dict(): codec = numcodecs.zarr3.LZ4(level=5) assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} + + +@pytest.mark.parametrize( + "codec_cls", + [ + numcodecs.zarr3.Blosc, + numcodecs.zarr3.LZ4, + numcodecs.zarr3.Zstd, + numcodecs.zarr3.Zlib, + numcodecs.zarr3.GZip, + numcodecs.zarr3.BZ2, + numcodecs.zarr3.LZMA, + numcodecs.zarr3.Shuffle, + numcodecs.zarr3.BitRound, + numcodecs.zarr3.Delta, + numcodecs.zarr3.FixedScaleOffset, + numcodecs.zarr3.Quantize, + numcodecs.zarr3.PackBits, + numcodecs.zarr3.AsType, + numcodecs.zarr3.CRC32, + numcodecs.zarr3.CRC32C, + numcodecs.zarr3.Adler32, + numcodecs.zarr3.Fletcher32, + numcodecs.zarr3.JenkinsLookup3, + numcodecs.zarr3.PCodec, + numcodecs.zarr3.ZFPY, + ], +) +def test_codecs_pickleable(codec_cls): + codec = codec_cls() + + expected = codec + + p = pickle.dumps(codec) + actual = pickle.loads(p) + assert actual == expected diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 43684c3d..595fa9fd 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -28,8 +28,8 @@ import asyncio import math from dataclasses import dataclass, replace -from functools import cached_property, partial -from typing import Any, Self, TypeVar +from functools import cached_property +from typing import Any, Self from warnings import warn import numpy as np @@ -79,6 +79,18 @@ class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] + def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): + """To be used only when creating the actual public-facing codec class.""" + super().__init_subclass__(**kwargs) + if codec_name is not None: + namespace = codec_name + + cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" + cls.codec_name = f"{CODEC_PREFIX}{namespace}" + cls.__doc__ = f""" + See :class:`{cls_name}` for more details and parameters. + """ + def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( @@ -180,96 +192,36 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) return chunk_spec.prototype.buffer.from_bytes(out) -T = TypeVar("T", bound=_NumcodecsCodec) - - -def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: - cls.__doc__ = f""" - See :class:`{ref_class_name}` for more details and parameters. - """ - return cls - - -def _add_docstring_wrapper(ref_class_name: str) -> partial: - return partial(_add_docstring, ref_class_name=ref_class_name) - - -def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _Codec(_NumcodecsBytesBytesCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - _Codec.__name__ = cls_name - return _Codec - - -def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _Codec(_NumcodecsArrayArrayCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - _Codec.__name__ = cls_name - return _Codec - - -def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name +# bytes-to-bytes codecs +class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): + pass - class _Codec(_NumcodecsArrayBytesCodec): - codec_name = _codec_name - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) +class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"): + pass - _Codec.__name__ = cls_name - return _Codec +class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"): + pass -def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - class _ChecksumCodec(_NumcodecsBytesBytesCodec): - codec_name = _codec_name +class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"): + pass - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - return input_byte_length + 4 # pragma: no cover +class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"): + pass - _ChecksumCodec.__name__ = cls_name - return _ChecksumCodec +class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"): + pass -# bytes-to-bytes codecs -Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc") -LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4") -Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd") -Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") -GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") -BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2") -LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA") +class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): + pass -@_add_docstring_wrapper("numcodecs.shuffle.Shuffle") -class Shuffle(_NumcodecsBytesBytesCodec): - codec_name = f"{CODEC_PREFIX}shuffle" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) +class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if self.codec_config.get("elementsize", None) is None: return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) @@ -277,31 +229,18 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: # array-to-array codecs ("filters") -@_add_docstring_wrapper("numcodecs.delta.Delta") -class Delta(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}delta" - - def __init__(self, **codec_config: dict[str, JSON]) -> None: - super().__init__(**codec_config) - +class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] return chunk_spec -BitRound = _add_docstring( - _make_array_array_codec("bitround", "BitRound"), "numcodecs.bitround.BitRound" -) - +class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): + pass -@_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset") -class FixedScaleOffset(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}fixedscaleoffset" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) +class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] @@ -313,10 +252,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: return self -@_add_docstring_wrapper("numcodecs.quantize.Quantize") -class Quantize(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}quantize" - +class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) @@ -326,13 +262,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: return self -@_add_docstring_wrapper("numcodecs.packbits.PackBits") -class PackBits(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}packbits" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, @@ -345,13 +275,7 @@ def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") -@_add_docstring_wrapper("numcodecs.astype.AsType") -class AsType(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}astype" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] @@ -362,19 +286,39 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: # bytes-to-bytes checksum codecs -CRC32 = _add_docstring(_make_checksum_codec("crc32", "CRC32"), "numcodecs.checksum32.CRC32") -CRC32C = _add_docstring(_make_checksum_codec("crc32c", "CRC32C"), "numcodecs.checksum32.CRC32C") -Adler32 = _add_docstring(_make_checksum_codec("adler32", "Adler32"), "numcodecs.checksum32.Adler32") -Fletcher32 = _add_docstring( - _make_checksum_codec("fletcher32", "Fletcher32"), "numcodecs.fletcher32.Fletcher32" -) -JenkinsLookup3 = _add_docstring( - _make_checksum_codec("jenkins_lookup3", "JenkinsLookup3"), "numcodecs.checksum32.JenkinsLookup3" -) +class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + return input_byte_length + 4 # pragma: no cover + + +class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): + pass + + +class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"): + pass + + +class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"): + pass + + +class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"): + pass + + +class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): + pass + # array-to-bytes codecs -PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec") -ZFPY = _add_docstring(_make_array_bytes_codec("zfpy", "ZFPY"), "numcodecs.zfpy.ZFPY") +class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"): + pass + + +class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"): + pass + __all__ = [ "BZ2",