Skip to content

Make all codecs pickleable #745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions numcodecs/tests/test_zarr3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import pickle
from typing import TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -277,3 +278,31 @@ def test_repr():
def test_to_dict():
codec = numcodecs.zarr3.LZ4(level=5)
assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}}


@pytest.mark.parametrize(
"codec_cls",
[
numcodecs.zarr3.Blosc,
numcodecs.zarr3.LZ4,
numcodecs.zarr3.Zstd,
numcodecs.zarr3.Zlib,
numcodecs.zarr3.GZip,
numcodecs.zarr3.BZ2,
numcodecs.zarr3.LZMA,
numcodecs.zarr3.Shuffle,
],
)
def test_codecs_pickleable(codec_cls):
codec = codec_cls()

expected = codec

p = pickle.dumps(codec)
actual = pickle.loads(p)
assert actual == expected

print(codec)
print(codec.codec_name)
print(codec.__doc__)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for numcodecs.zarr3.Blosc this returns

Blosc(codec_name='numcodecs.Blosc', codec_config={})
numcodecs.Blosc

            See :class:`numcodecs.blosc.Blosc` for more details and parameters.
            

which all seems correct to me?

#assert False
67 changes: 45 additions & 22 deletions numcodecs/zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from functools import cached_property, partial
from typing import Any, Self, TypeVar
from warnings import warn
import textwrap

import numpy as np

Expand Down Expand Up @@ -74,11 +75,26 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]:
return {"id": id, **parsed_configuration}


def snake_case(codec_name: str) -> str:
# TODO the Jenkins codec is a special case because it inserts an _
return codec_name.lower()


@dataclass(frozen=True)
class _NumcodecsCodec(Metadata):
codec_name: str
codec_config: dict[str, JSON]

def __init_subclass__(cls, *, namespace: str | None = None, codec_name: str | None = None, **kwargs):
"""To be used only when creating the actual public-facing codec class."""
super().__init_subclass__(**kwargs)
if namespace is not None and codec_name is not None:
cls_name = f"{CODEC_PREFIX}{namespace}.{codec_name}"
cls.codec_name = f"{CODEC_PREFIX}{codec_name}"
cls.__doc__ = f"""
See :class:`{cls_name}` for more details and parameters.
"""

def __init__(self, **codec_config: JSON) -> None:
if not self.codec_name:
raise ValueError(
Expand Down Expand Up @@ -184,30 +200,18 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec)


def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All this can be deleted once the changes in this PR are applied to all the other codecs

cls.__doc__ = f"""
cls.__doc__ = textwrap.dedent(
f"""
See :class:`{ref_class_name}` for more details and parameters.
"""
)
return cls


def _add_docstring_wrapper(ref_class_name: str) -> partial:
return partial(_add_docstring, ref_class_name=ref_class_name)


def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name

class _Codec(_NumcodecsBytesBytesCodec):
codec_name = _codec_name

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

_Codec.__name__ = cls_name
return _Codec


def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name
Expand Down Expand Up @@ -254,13 +258,32 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) ->


# bytes-to-bytes codecs
Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc")
LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4")
Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd")
Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib")
GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip")
BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2")
LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA")
class Blosc(_NumcodecsBytesBytesCodec, namespace="blosc", codec_name="Blosc"):
pass


class LZ4(_NumcodecsBytesBytesCodec, namespace="lz4", codec_name="LZ4"):
pass


class Zstd(_NumcodecsBytesBytesCodec, namespace="zstd", codec_name="Zstd"):
pass


class Zlib(_NumcodecsBytesBytesCodec, namespace="zlib", codec_name="Zlib"):
pass


class GZip(_NumcodecsBytesBytesCodec, namespace="gzip", codec_name="GZip"):
pass


class BZ2(_NumcodecsBytesBytesCodec, namespace="bz2", codec_name="BZ2"):
pass


class LZMA(_NumcodecsBytesBytesCodec, namespace="lzma",codec_name="LZMA"):
pass


@_add_docstring_wrapper("numcodecs.shuffle.Shuffle")
Expand Down
Loading