From 80b3a422a73493f5f264662cc634fa5713618511 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Apr 2025 15:37:24 -0400 Subject: [PATCH 01/22] make Zlib codec pickleable --- numcodecs/zarr3.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 43684c3d..1bcc0fcf 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -257,7 +257,16 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc") LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4") Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd") -Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") + + +#Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") +class Zlib(_NumcodecsBytesBytesCodec): + codec_name = CODEC_PREFIX + "zlib" + + def __init__(self, **codec_config: JSON) -> None: + super().__init__(**codec_config) + + GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2") LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA") From 83826c093a2f99b0855c4aa3b307855ad7e483b0 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 00:06:54 -0400 Subject: [PATCH 02/22] add test --- numcodecs/tests/test_zarr3.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 0d8ecc74..1068bc77 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -1,5 +1,6 @@ from __future__ import annotations +import pickle from typing import TYPE_CHECKING import numpy as np @@ -277,3 +278,31 @@ def test_repr(): def test_to_dict(): codec = numcodecs.zarr3.LZ4(level=5) assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} + + +@pytest.mark.parametrize( + "codec_cls", + [ + numcodecs.zarr3.Blosc, + numcodecs.zarr3.LZ4, + numcodecs.zarr3.Zstd, + numcodecs.zarr3.Zlib, + numcodecs.zarr3.GZip, + numcodecs.zarr3.BZ2, + numcodecs.zarr3.LZMA, + numcodecs.zarr3.Shuffle, + ], +) +def test_codecs_pickleable(codec_cls): + codec = codec_cls() + + expected = codec + + p = pickle.dumps(codec) + actual = pickle.loads(p) + assert actual == expected + + print(codec) + print(codec.codec_name) + print(codec.__doc__) + #assert False From eba4a8fd7797413c846b7f7a2463dec53551fb8b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 00:07:24 -0400 Subject: [PATCH 03/22] show __init_subclass__ can work for Zlib --- numcodecs/zarr3.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 1bcc0fcf..a8a4f8df 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -74,11 +74,24 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: return {"id": id, **parsed_configuration} +def snake_case(codec_name: str) -> str: + # TODO the Jenkins codec is a special case because it inserts an _ + return codec_name.lower() + + @dataclass(frozen=True) class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] + def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): + super().__init_subclass__(**kwargs) + if codec_name is not None: + cls.codec_name = CODEC_PREFIX + codec_name + cls.__doc__ = f""" + See :class:`{snake_case(cls.codec_name)}.{codec_name}` for more details and parameters. + """ + def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( @@ -260,11 +273,8 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> #Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") -class Zlib(_NumcodecsBytesBytesCodec): - codec_name = CODEC_PREFIX + "zlib" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) +class Zlib(_NumcodecsBytesBytesCodec, codec_name="Zlib"): + pass GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") From 775608c2428c06c34cbe066af4147f4bd8c90647 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 13:07:31 -0400 Subject: [PATCH 04/22] refactor the BytesBytes Codecs to use __init_subclass__ --- numcodecs/zarr3.py | 58 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index a8a4f8df..5547c1e1 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -31,6 +31,7 @@ from functools import cached_property, partial from typing import Any, Self, TypeVar from warnings import warn +import textwrap import numpy as np @@ -84,12 +85,14 @@ class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] - def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): + def __init_subclass__(cls, *, namespace: str | None = None, codec_name: str | None = None, **kwargs): + """To be used only when creating the actual public-facing codec class.""" super().__init_subclass__(**kwargs) - if codec_name is not None: - cls.codec_name = CODEC_PREFIX + codec_name + if namespace is not None and codec_name is not None: + cls_name = f"{CODEC_PREFIX}{namespace}.{codec_name}" + cls.codec_name = f"{CODEC_PREFIX}{codec_name}" cls.__doc__ = f""" - See :class:`{snake_case(cls.codec_name)}.{codec_name}` for more details and parameters. + See :class:`{cls_name}` for more details and parameters. """ def __init__(self, **codec_config: JSON) -> None: @@ -197,9 +200,11 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: - cls.__doc__ = f""" + cls.__doc__ = textwrap.dedent( + f""" See :class:`{ref_class_name}` for more details and parameters. """ + ) return cls @@ -207,20 +212,6 @@ def _add_docstring_wrapper(ref_class_name: str) -> partial: return partial(_add_docstring, ref_class_name=ref_class_name) -def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _Codec(_NumcodecsBytesBytesCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - _Codec.__name__ = cls_name - return _Codec - - def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]: # rename for class scope _codec_name = CODEC_PREFIX + codec_name @@ -267,19 +258,32 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> # bytes-to-bytes codecs -Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc") -LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4") -Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd") +class Blosc(_NumcodecsBytesBytesCodec, namespace="blosc", codec_name="Blosc"): + pass -#Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") -class Zlib(_NumcodecsBytesBytesCodec, codec_name="Zlib"): +class LZ4(_NumcodecsBytesBytesCodec, namespace="lz4", codec_name="LZ4"): pass -GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") -BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2") -LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA") +class Zstd(_NumcodecsBytesBytesCodec, namespace="zstd", codec_name="Zstd"): + pass + + +class Zlib(_NumcodecsBytesBytesCodec, namespace="zlib", codec_name="Zlib"): + pass + + +class GZip(_NumcodecsBytesBytesCodec, namespace="gzip", codec_name="GZip"): + pass + + +class BZ2(_NumcodecsBytesBytesCodec, namespace="bz2", codec_name="BZ2"): + pass + + +class LZMA(_NumcodecsBytesBytesCodec, namespace="lzma",codec_name="LZMA"): + pass @_add_docstring_wrapper("numcodecs.shuffle.Shuffle") From 5304a56e20a40c976f9f6259fb060fcdd224057f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 13:10:46 -0400 Subject: [PATCH 05/22] remove snake_case function --- numcodecs/zarr3.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 5547c1e1..c54951d4 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -75,11 +75,6 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: return {"id": id, **parsed_configuration} -def snake_case(codec_name: str) -> str: - # TODO the Jenkins codec is a special case because it inserts an _ - return codec_name.lower() - - @dataclass(frozen=True) class _NumcodecsCodec(Metadata): codec_name: str From 5ef6a47761ab65c161269859e7e5e1ffba9a4f7b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 13:53:45 -0700 Subject: [PATCH 06/22] Chuck's suggestions Co-authored-by: Chuck Daniels --- numcodecs/zarr3.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index c54951d4..b1bacca6 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -80,11 +80,11 @@ class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] - def __init_subclass__(cls, *, namespace: str | None = None, codec_name: str | None = None, **kwargs): + def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): """To be used only when creating the actual public-facing codec class.""" super().__init_subclass__(**kwargs) - if namespace is not None and codec_name is not None: - cls_name = f"{CODEC_PREFIX}{namespace}.{codec_name}" + if codec_name is not None: + cls_name = f"{CODEC_PREFIX}{codec_name}.{cls.__name__}" cls.codec_name = f"{CODEC_PREFIX}{codec_name}" cls.__doc__ = f""" See :class:`{cls_name}` for more details and parameters. @@ -253,31 +253,31 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> # bytes-to-bytes codecs -class Blosc(_NumcodecsBytesBytesCodec, namespace="blosc", codec_name="Blosc"): +class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): pass -class LZ4(_NumcodecsBytesBytesCodec, namespace="lz4", codec_name="LZ4"): +class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"): pass -class Zstd(_NumcodecsBytesBytesCodec, namespace="zstd", codec_name="Zstd"): +class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"): pass -class Zlib(_NumcodecsBytesBytesCodec, namespace="zlib", codec_name="Zlib"): +class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"): pass -class GZip(_NumcodecsBytesBytesCodec, namespace="gzip", codec_name="GZip"): +class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"): pass -class BZ2(_NumcodecsBytesBytesCodec, namespace="bz2", codec_name="BZ2"): +class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"): pass -class LZMA(_NumcodecsBytesBytesCodec, namespace="lzma",codec_name="LZMA"): +class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): pass From 037cda431af998552ec8ba2d2eda727d04ec0331 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:13:23 -0400 Subject: [PATCH 07/22] redefine Bitround --- numcodecs/zarr3.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index b1bacca6..b958410b 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -207,20 +207,6 @@ def _add_docstring_wrapper(ref_class_name: str) -> partial: return partial(_add_docstring, ref_class_name=ref_class_name) -def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _Codec(_NumcodecsArrayArrayCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - _Codec.__name__ = cls_name - return _Codec - - def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]: # rename for class scope _codec_name = CODEC_PREFIX + codec_name @@ -308,9 +294,8 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return chunk_spec -BitRound = _add_docstring( - _make_array_array_codec("bitround", "BitRound"), "numcodecs.bitround.BitRound" -) +class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): + pass @_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset") From 7e624f4649f5c6eea18223392a5726f3eec4c0f3 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:13:46 -0400 Subject: [PATCH 08/22] remove debugging prints from test --- numcodecs/tests/test_zarr3.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 1068bc77..1394c194 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -3,6 +3,7 @@ import pickle from typing import TYPE_CHECKING +import numcodecs.bitround import numpy as np import pytest @@ -291,6 +292,7 @@ def test_to_dict(): numcodecs.zarr3.BZ2, numcodecs.zarr3.LZMA, numcodecs.zarr3.Shuffle, + numcodecs.zarr3.BitRound, ], ) def test_codecs_pickleable(codec_cls): @@ -301,8 +303,3 @@ def test_codecs_pickleable(codec_cls): p = pickle.dumps(codec) actual = pickle.loads(p) assert actual == expected - - print(codec) - print(codec.codec_name) - print(codec.__doc__) - #assert False From 6e33e1050e6c7c3c0f6539dfa96341436ff20dca Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:16:14 -0400 Subject: [PATCH 09/22] redefine Shuffle --- numcodecs/zarr3.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index b958410b..059c7e8c 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -267,17 +267,11 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): pass -@_add_docstring_wrapper("numcodecs.shuffle.Shuffle") -class Shuffle(_NumcodecsBytesBytesCodec): - codec_name = f"{CODEC_PREFIX}shuffle" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: - if self.codec_config.get("elementsize", None) is None: - return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) - return self # pragma: no cover + if self.codec_config.get("elementsize", None) is None: + return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + return self # pragma: no cover # array-to-array codecs ("filters") From b1ede896174f4c52156a53526cfd7a0f875f3571 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:19:33 -0400 Subject: [PATCH 10/22] redefine Delta --- numcodecs/tests/test_zarr3.py | 1 + numcodecs/zarr3.py | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 1394c194..3a27fb9a 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -293,6 +293,7 @@ def test_to_dict(): numcodecs.zarr3.LZMA, numcodecs.zarr3.Shuffle, numcodecs.zarr3.BitRound, + numcodecs.zarr3.Delta, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 059c7e8c..acf70d4e 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -275,13 +275,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: # array-to-array codecs ("filters") -@_add_docstring_wrapper("numcodecs.delta.Delta") -class Delta(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}delta" - - def __init__(self, **codec_config: dict[str, JSON]) -> None: - super().__init__(**codec_config) - +class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] From 0a6c2c76d4758f03b6e52c76553f83e7bd52c22c Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:24:07 -0400 Subject: [PATCH 11/22] redefine FixedScaleOffset --- numcodecs/tests/test_zarr3.py | 1 + numcodecs/zarr3.py | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 3a27fb9a..5dacf5b6 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -294,6 +294,7 @@ def test_to_dict(): numcodecs.zarr3.Shuffle, numcodecs.zarr3.BitRound, numcodecs.zarr3.Delta, + numcodecs.zarr3.FixedScaleOffset, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index acf70d4e..79a6253c 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -286,13 +286,7 @@ class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): pass -@_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset") -class FixedScaleOffset(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}fixedscaleoffset" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] From 09997f51471bb1afbd1a4f7827b056341c2f9d00 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:30:10 -0400 Subject: [PATCH 12/22] Quantize --- numcodecs/tests/test_zarr3.py | 2 ++ numcodecs/zarr3.py | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 5dacf5b6..f6d8c7e6 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -281,6 +281,7 @@ def test_to_dict(): assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} +# TODO replace this explicit list of parametrizations by somehow importing all from numcodecs.zarr3 @pytest.mark.parametrize( "codec_cls", [ @@ -295,6 +296,7 @@ def test_to_dict(): numcodecs.zarr3.BitRound, numcodecs.zarr3.Delta, numcodecs.zarr3.FixedScaleOffset, + numcodecs.zarr3.Quantize, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 79a6253c..0b592150 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -298,10 +298,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: return self -@_add_docstring_wrapper("numcodecs.quantize.Quantize") -class Quantize(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}quantize" - +class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) From 6b57ca030aa201873887519e7f4b9f5c4a39bc2c Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:35:21 -0400 Subject: [PATCH 13/22] PackBits --- numcodecs/tests/test_zarr3.py | 1 + numcodecs/zarr3.py | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index f6d8c7e6..17461ed8 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -297,6 +297,7 @@ def test_to_dict(): numcodecs.zarr3.Delta, numcodecs.zarr3.FixedScaleOffset, numcodecs.zarr3.Quantize, + numcodecs.zarr3.PackBits, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 0b592150..4c089eb1 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -308,13 +308,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: return self -@_add_docstring_wrapper("numcodecs.packbits.PackBits") -class PackBits(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}packbits" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, From 8907aa022927c2b847995315019a7ae8dc6f33e6 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:36:37 -0400 Subject: [PATCH 14/22] AsType --- numcodecs/tests/test_zarr3.py | 1 + numcodecs/zarr3.py | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 17461ed8..0de1c357 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -298,6 +298,7 @@ def test_to_dict(): numcodecs.zarr3.FixedScaleOffset, numcodecs.zarr3.Quantize, numcodecs.zarr3.PackBits, + numcodecs.zarr3.AsType, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 4c089eb1..3e2a0d5e 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -321,13 +321,7 @@ def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") -@_add_docstring_wrapper("numcodecs.astype.AsType") -class AsType(_NumcodecsArrayArrayCodec): - codec_name = f"{CODEC_PREFIX}astype" - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - +class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] From 51fbd8fa10efc0e5ff52d54cdb3d734ff84dd50d Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:56:30 -0400 Subject: [PATCH 15/22] redefine checksum codecs --- numcodecs/tests/test_zarr3.py | 5 +++++ numcodecs/zarr3.py | 39 +++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 0de1c357..e75db0ed 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -299,6 +299,11 @@ def test_to_dict(): numcodecs.zarr3.Quantize, numcodecs.zarr3.PackBits, numcodecs.zarr3.AsType, + numcodecs.zarr3.CRC32, + numcodecs.zarr3.CRC32C, + numcodecs.zarr3.Adler32, + numcodecs.zarr3.Fletcher32, + numcodecs.zarr3.JenkinsLookup3, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 3e2a0d5e..66318b80 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -84,8 +84,10 @@ def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): """To be used only when creating the actual public-facing codec class.""" super().__init_subclass__(**kwargs) if codec_name is not None: - cls_name = f"{CODEC_PREFIX}{codec_name}.{cls.__name__}" - cls.codec_name = f"{CODEC_PREFIX}{codec_name}" + namespace = codec_name + + cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" + cls.codec_name = f"{CODEC_PREFIX}{namespace}" cls.__doc__ = f""" See :class:`{cls_name}` for more details and parameters. """ @@ -332,15 +334,30 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: # bytes-to-bytes checksum codecs -CRC32 = _add_docstring(_make_checksum_codec("crc32", "CRC32"), "numcodecs.checksum32.CRC32") -CRC32C = _add_docstring(_make_checksum_codec("crc32c", "CRC32C"), "numcodecs.checksum32.CRC32C") -Adler32 = _add_docstring(_make_checksum_codec("adler32", "Adler32"), "numcodecs.checksum32.Adler32") -Fletcher32 = _add_docstring( - _make_checksum_codec("fletcher32", "Fletcher32"), "numcodecs.fletcher32.Fletcher32" -) -JenkinsLookup3 = _add_docstring( - _make_checksum_codec("jenkins_lookup3", "JenkinsLookup3"), "numcodecs.checksum32.JenkinsLookup3" -) +class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + return input_byte_length + 4 # pragma: no cover + + +class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): + pass + + +class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"): + pass + + +class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"): + pass + + +class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"): + pass + + +class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): + pass + # array-to-bytes codecs PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec") From 97f0ac91cc88a8b68f89381edd9c9a69161d7464 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 18:58:55 -0400 Subject: [PATCH 16/22] array to bytes codecs --- numcodecs/tests/test_zarr3.py | 2 ++ numcodecs/zarr3.py | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index e75db0ed..ddbf7ae4 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -304,6 +304,8 @@ def test_to_dict(): numcodecs.zarr3.Adler32, numcodecs.zarr3.Fletcher32, numcodecs.zarr3.JenkinsLookup3, + numcodecs.zarr3.PCodec, + numcodecs.zarr3.ZFPY, ], ) def test_codecs_pickleable(codec_cls): diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 66318b80..f77a16a5 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -360,8 +360,13 @@ class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): # array-to-bytes codecs -PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec") -ZFPY = _add_docstring(_make_array_bytes_codec("zfpy", "ZFPY"), "numcodecs.zfpy.ZFPY") +class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"): + pass + + +class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"): + pass + __all__ = [ "BZ2", From fa2e44853178aff74dc4d343086ab435f19dd988 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 19:03:46 -0400 Subject: [PATCH 17/22] remove todo --- numcodecs/tests/test_zarr3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index ddbf7ae4..99adc986 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -281,7 +281,6 @@ def test_to_dict(): assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} -# TODO replace this explicit list of parametrizations by somehow importing all from numcodecs.zarr3 @pytest.mark.parametrize( "codec_cls", [ From 78b82fca962a005c6d440ae7901bf670b2ae51b4 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 19:05:10 -0400 Subject: [PATCH 18/22] remove dynamic constructors --- numcodecs/zarr3.py | 47 ---------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index f77a16a5..458846aa 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -193,53 +193,6 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) return chunk_spec.prototype.buffer.from_bytes(out) -T = TypeVar("T", bound=_NumcodecsCodec) - - -def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: - cls.__doc__ = textwrap.dedent( - f""" - See :class:`{ref_class_name}` for more details and parameters. - """ - ) - return cls - - -def _add_docstring_wrapper(ref_class_name: str) -> partial: - return partial(_add_docstring, ref_class_name=ref_class_name) - - -def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _Codec(_NumcodecsArrayBytesCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - _Codec.__name__ = cls_name - return _Codec - - -def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: - # rename for class scope - _codec_name = CODEC_PREFIX + codec_name - - class _ChecksumCodec(_NumcodecsBytesBytesCodec): - codec_name = _codec_name - - def __init__(self, **codec_config: JSON) -> None: - super().__init__(**codec_config) - - def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - return input_byte_length + 4 # pragma: no cover - - _ChecksumCodec.__name__ = cls_name - return _ChecksumCodec - - # bytes-to-bytes codecs class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): pass From 61ae42c0c4a7936aa19c3b88236e7c92b27ee992 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 19:10:03 -0400 Subject: [PATCH 19/22] release note --- docs/release.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index bda2f867..cd25fd93 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -25,6 +25,8 @@ Improvements * In ``vlen``, define and use ``const`` ``HEADER_LENGTH``. By :user:`John Kirkham `, :issue:`723` +* All codecs are now pickleable. + By :user:`Tom Nicholas `, :issue:`744` Fixes ~~~~~ From 6d04f9b7c8ad0ab2e4913eaed7de8d633a02139a Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 19:10:19 -0400 Subject: [PATCH 20/22] remove unneeded imports --- numcodecs/zarr3.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 458846aa..dac42ba6 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -28,10 +28,9 @@ import asyncio import math from dataclasses import dataclass, replace -from functools import cached_property, partial -from typing import Any, Self, TypeVar +from functools import cached_property +from typing import Any, Self from warnings import warn -import textwrap import numpy as np From ea8c06a1810f64bd0850480384d263541937f992 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Apr 2025 23:30:28 +0000 Subject: [PATCH 21/22] style: pre-commit fixes --- numcodecs/tests/test_zarr3.py | 3 ++- numcodecs/zarr3.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 99adc986..78336a94 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -3,10 +3,11 @@ import pickle from typing import TYPE_CHECKING -import numcodecs.bitround import numpy as np import pytest +import numcodecs.bitround + if TYPE_CHECKING: # pragma: no cover import zarr else: diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index dac42ba6..595fa9fd 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -223,9 +223,9 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: - if self.codec_config.get("elementsize", None) is None: - return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) - return self # pragma: no cover + if self.codec_config.get("elementsize", None) is None: + return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + return self # pragma: no cover # array-to-array codecs ("filters") From 4c229b7fa105796a922fd4d56d21ed9bc5358f1c Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Apr 2025 19:32:31 -0400 Subject: [PATCH 22/22] remove unused type ignore --- numcodecs/tests/test_zarr3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 78336a94..8e2d1264 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -263,7 +263,7 @@ def test_delta_astype(store: StorePath): dtype=data.dtype, fill_value=0, filters=[ - numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] + numcodecs.zarr3.Delta(dtype="i8", astype="i2"), ], )