Skip to content

Make all codecs pickleable #745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Improvements

* In ``vlen``, define and use ``const`` ``HEADER_LENGTH``.
By :user:`John Kirkham <jakirkham>`, :issue:`723`
* All codecs are now pickleable.
By :user:`Tom Nicholas <TomNicholas>`, :issue:`744`

Fixes
~~~~~
Expand Down
41 changes: 40 additions & 1 deletion numcodecs/tests/test_zarr3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from __future__ import annotations

import pickle
from typing import TYPE_CHECKING

import numpy as np
import pytest

import numcodecs.bitround

Comment on lines +9 to +10
Copy link
Member Author

@TomNicholas TomNicholas Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember adding this - maybe the linter did??

if TYPE_CHECKING: # pragma: no cover
import zarr
else:
Expand Down Expand Up @@ -260,7 +263,7 @@ def test_delta_astype(store: StorePath):
dtype=data.dtype,
fill_value=0,
filters=[
numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type]
numcodecs.zarr3.Delta(dtype="i8", astype="i2"),
],
)

Expand All @@ -277,3 +280,39 @@ def test_repr():
def test_to_dict():
codec = numcodecs.zarr3.LZ4(level=5)
assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}}


@pytest.mark.parametrize(
"codec_cls",
[
numcodecs.zarr3.Blosc,
numcodecs.zarr3.LZ4,
numcodecs.zarr3.Zstd,
numcodecs.zarr3.Zlib,
numcodecs.zarr3.GZip,
numcodecs.zarr3.BZ2,
numcodecs.zarr3.LZMA,
numcodecs.zarr3.Shuffle,
numcodecs.zarr3.BitRound,
numcodecs.zarr3.Delta,
numcodecs.zarr3.FixedScaleOffset,
numcodecs.zarr3.Quantize,
numcodecs.zarr3.PackBits,
numcodecs.zarr3.AsType,
numcodecs.zarr3.CRC32,
numcodecs.zarr3.CRC32C,
numcodecs.zarr3.Adler32,
numcodecs.zarr3.Fletcher32,
numcodecs.zarr3.JenkinsLookup3,
numcodecs.zarr3.PCodec,
numcodecs.zarr3.ZFPY,
],
)
def test_codecs_pickleable(codec_cls):
codec = codec_cls()

expected = codec

p = pickle.dumps(codec)
actual = pickle.loads(p)
assert actual == expected
192 changes: 68 additions & 124 deletions numcodecs/zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import asyncio
import math
from dataclasses import dataclass, replace
from functools import cached_property, partial
from typing import Any, Self, TypeVar
from functools import cached_property
from typing import Any, Self
from warnings import warn

import numpy as np
Expand Down Expand Up @@ -79,6 +79,18 @@ class _NumcodecsCodec(Metadata):
codec_name: str
codec_config: dict[str, JSON]

def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs):
"""To be used only when creating the actual public-facing codec class."""
super().__init_subclass__(**kwargs)
if codec_name is not None:
namespace = codec_name

cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}"
cls.codec_name = f"{CODEC_PREFIX}{namespace}"
cls.__doc__ = f"""
See :class:`{cls_name}` for more details and parameters.
"""

def __init__(self, **codec_config: JSON) -> None:
if not self.codec_name:
raise ValueError(
Expand Down Expand Up @@ -180,128 +192,55 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec)
return chunk_spec.prototype.buffer.from_bytes(out)


T = TypeVar("T", bound=_NumcodecsCodec)


def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]:
cls.__doc__ = f"""
See :class:`{ref_class_name}` for more details and parameters.
"""
return cls


def _add_docstring_wrapper(ref_class_name: str) -> partial:
return partial(_add_docstring, ref_class_name=ref_class_name)


def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name

class _Codec(_NumcodecsBytesBytesCodec):
codec_name = _codec_name

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

_Codec.__name__ = cls_name
return _Codec


def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name

class _Codec(_NumcodecsArrayArrayCodec):
codec_name = _codec_name

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

_Codec.__name__ = cls_name
return _Codec


def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name
# bytes-to-bytes codecs
class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"):
pass

class _Codec(_NumcodecsArrayBytesCodec):
codec_name = _codec_name

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)
class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"):
pass

_Codec.__name__ = cls_name
return _Codec

class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"):
pass

def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]:
# rename for class scope
_codec_name = CODEC_PREFIX + codec_name

class _ChecksumCodec(_NumcodecsBytesBytesCodec):
codec_name = _codec_name
class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"):
pass

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
return input_byte_length + 4 # pragma: no cover
class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"):
pass

_ChecksumCodec.__name__ = cls_name
return _ChecksumCodec

class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"):
pass

# bytes-to-bytes codecs
Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc")
LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4")
Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd")
Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib")
GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip")
BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2")
LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA")

class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"):
pass

@_add_docstring_wrapper("numcodecs.shuffle.Shuffle")
class Shuffle(_NumcodecsBytesBytesCodec):
codec_name = f"{CODEC_PREFIX}shuffle"

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"):
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle:
if self.codec_config.get("elementsize", None) is None:
return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize})
return self # pragma: no cover


# array-to-array codecs ("filters")
@_add_docstring_wrapper("numcodecs.delta.Delta")
class Delta(_NumcodecsArrayArrayCodec):
codec_name = f"{CODEC_PREFIX}delta"

def __init__(self, **codec_config: dict[str, JSON]) -> None:
super().__init__(**codec_config)

class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
return chunk_spec


BitRound = _add_docstring(
_make_array_array_codec("bitround", "BitRound"), "numcodecs.bitround.BitRound"
)

class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"):
pass

@_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset")
class FixedScaleOffset(_NumcodecsArrayArrayCodec):
codec_name = f"{CODEC_PREFIX}fixedscaleoffset"

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
Expand All @@ -313,10 +252,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:
return self


@_add_docstring_wrapper("numcodecs.quantize.Quantize")
class Quantize(_NumcodecsArrayArrayCodec):
codec_name = f"{CODEC_PREFIX}quantize"

class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"):
def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

Expand All @@ -326,13 +262,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize:
return self


@_add_docstring_wrapper("numcodecs.packbits.PackBits")
class PackBits(_NumcodecsArrayArrayCodec):
codec_name = f"{CODEC_PREFIX}packbits"

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
return replace(
chunk_spec,
Expand All @@ -345,13 +275,7 @@ def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None:
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.")


@_add_docstring_wrapper("numcodecs.astype.AsType")
class AsType(_NumcodecsArrayArrayCodec):
codec_name = f"{CODEC_PREFIX}astype"

def __init__(self, **codec_config: JSON) -> None:
super().__init__(**codec_config)

class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"):
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type]

Expand All @@ -362,19 +286,39 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType:


# bytes-to-bytes checksum codecs
CRC32 = _add_docstring(_make_checksum_codec("crc32", "CRC32"), "numcodecs.checksum32.CRC32")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These codecs break the pattern of the ones above because they have "numcodecs.checksum32.<name>" instead of "numcodecs.<lowercasename>.<name>". I ignored that, but it doesn't seem to matter whatsoever?? Suspicious.

CRC32C = _add_docstring(_make_checksum_codec("crc32c", "CRC32C"), "numcodecs.checksum32.CRC32C")
Adler32 = _add_docstring(_make_checksum_codec("adler32", "Adler32"), "numcodecs.checksum32.Adler32")
Fletcher32 = _add_docstring(
_make_checksum_codec("fletcher32", "Fletcher32"), "numcodecs.fletcher32.Fletcher32"
)
JenkinsLookup3 = _add_docstring(
_make_checksum_codec("jenkins_lookup3", "JenkinsLookup3"), "numcodecs.checksum32.JenkinsLookup3"
)
class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec):
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
return input_byte_length + 4 # pragma: no cover


class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):
pass


class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"):
pass


class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"):
pass


class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"):
pass


class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"):
pass


# array-to-bytes codecs
PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec")
ZFPY = _add_docstring(_make_array_bytes_codec("zfpy", "ZFPY"), "numcodecs.zfpy.ZFPY")
class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"):
pass


class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"):
pass


__all__ = [
"BZ2",
Expand Down
Loading