-
Notifications
You must be signed in to change notification settings - Fork 97
Make all codecs pickleable #745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
TomNicholas
wants to merge
22
commits into
zarr-developers:main
Choose a base branch
from
TomNicholas:pickleable_all_codecs
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+110
−125
Open
Changes from all commits
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
80b3a42
make Zlib codec pickleable
TomNicholas 83826c0
add test
TomNicholas eba4a8f
show __init_subclass__ can work for Zlib
TomNicholas 775608c
refactor the BytesBytes Codecs to use __init_subclass__
TomNicholas 5304a56
remove snake_case function
TomNicholas 5ef6a47
Chuck's suggestions
TomNicholas 037cda4
redefine Bitround
TomNicholas 7e624f4
remove debugging prints from test
TomNicholas 6e33e10
redefine Shuffle
TomNicholas b1ede89
redefine Delta
TomNicholas 0a6c2c7
redefine FixedScaleOffset
TomNicholas 09997f5
Quantize
TomNicholas 6b57ca0
PackBits
TomNicholas 8907aa0
AsType
TomNicholas 51fbd8f
redefine checksum codecs
TomNicholas 97f0ac9
array to bytes codecs
TomNicholas fa2e448
remove todo
TomNicholas 78b82fc
remove dynamic constructors
TomNicholas 61ae42c
release note
TomNicholas 6d04f9b
remove unneeded imports
TomNicholas ea8c06a
style: pre-commit fixes
pre-commit-ci[bot] 4c229b7
remove unused type ignore
TomNicholas File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,8 +28,8 @@ | |
import asyncio | ||
import math | ||
from dataclasses import dataclass, replace | ||
from functools import cached_property, partial | ||
from typing import Any, Self, TypeVar | ||
from functools import cached_property | ||
from typing import Any, Self | ||
from warnings import warn | ||
|
||
import numpy as np | ||
|
@@ -79,6 +79,18 @@ class _NumcodecsCodec(Metadata): | |
codec_name: str | ||
codec_config: dict[str, JSON] | ||
|
||
def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs): | ||
"""To be used only when creating the actual public-facing codec class.""" | ||
super().__init_subclass__(**kwargs) | ||
if codec_name is not None: | ||
namespace = codec_name | ||
|
||
cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" | ||
cls.codec_name = f"{CODEC_PREFIX}{namespace}" | ||
cls.__doc__ = f""" | ||
See :class:`{cls_name}` for more details and parameters. | ||
""" | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
if not self.codec_name: | ||
raise ValueError( | ||
|
@@ -180,128 +192,55 @@ async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) | |
return chunk_spec.prototype.buffer.from_bytes(out) | ||
|
||
|
||
T = TypeVar("T", bound=_NumcodecsCodec) | ||
|
||
|
||
def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: | ||
cls.__doc__ = f""" | ||
See :class:`{ref_class_name}` for more details and parameters. | ||
""" | ||
return cls | ||
|
||
|
||
def _add_docstring_wrapper(ref_class_name: str) -> partial: | ||
return partial(_add_docstring, ref_class_name=ref_class_name) | ||
|
||
|
||
def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: | ||
# rename for class scope | ||
_codec_name = CODEC_PREFIX + codec_name | ||
|
||
class _Codec(_NumcodecsBytesBytesCodec): | ||
codec_name = _codec_name | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
_Codec.__name__ = cls_name | ||
return _Codec | ||
|
||
|
||
def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]: | ||
# rename for class scope | ||
_codec_name = CODEC_PREFIX + codec_name | ||
|
||
class _Codec(_NumcodecsArrayArrayCodec): | ||
codec_name = _codec_name | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
_Codec.__name__ = cls_name | ||
return _Codec | ||
|
||
|
||
def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]: | ||
# rename for class scope | ||
_codec_name = CODEC_PREFIX + codec_name | ||
# bytes-to-bytes codecs | ||
class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): | ||
pass | ||
|
||
class _Codec(_NumcodecsArrayBytesCodec): | ||
codec_name = _codec_name | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"): | ||
pass | ||
|
||
_Codec.__name__ = cls_name | ||
return _Codec | ||
|
||
class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"): | ||
pass | ||
|
||
def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: | ||
# rename for class scope | ||
_codec_name = CODEC_PREFIX + codec_name | ||
|
||
class _ChecksumCodec(_NumcodecsBytesBytesCodec): | ||
codec_name = _codec_name | ||
class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"): | ||
pass | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: | ||
return input_byte_length + 4 # pragma: no cover | ||
class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"): | ||
pass | ||
|
||
_ChecksumCodec.__name__ = cls_name | ||
return _ChecksumCodec | ||
|
||
class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"): | ||
pass | ||
|
||
# bytes-to-bytes codecs | ||
Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc") | ||
LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4") | ||
Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd") | ||
Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") | ||
GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") | ||
BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2") | ||
LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA") | ||
|
||
class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): | ||
pass | ||
|
||
@_add_docstring_wrapper("numcodecs.shuffle.Shuffle") | ||
class Shuffle(_NumcodecsBytesBytesCodec): | ||
codec_name = f"{CODEC_PREFIX}shuffle" | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): | ||
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: | ||
if self.codec_config.get("elementsize", None) is None: | ||
return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) | ||
return self # pragma: no cover | ||
|
||
|
||
# array-to-array codecs ("filters") | ||
@_add_docstring_wrapper("numcodecs.delta.Delta") | ||
class Delta(_NumcodecsArrayArrayCodec): | ||
codec_name = f"{CODEC_PREFIX}delta" | ||
|
||
def __init__(self, **codec_config: dict[str, JSON]) -> None: | ||
super().__init__(**codec_config) | ||
|
||
class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): | ||
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: | ||
if astype := self.codec_config.get("astype"): | ||
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] | ||
return chunk_spec | ||
|
||
|
||
BitRound = _add_docstring( | ||
_make_array_array_codec("bitround", "BitRound"), "numcodecs.bitround.BitRound" | ||
) | ||
|
||
class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): | ||
pass | ||
|
||
@_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset") | ||
class FixedScaleOffset(_NumcodecsArrayArrayCodec): | ||
codec_name = f"{CODEC_PREFIX}fixedscaleoffset" | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): | ||
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: | ||
if astype := self.codec_config.get("astype"): | ||
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] | ||
|
@@ -313,10 +252,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: | |
return self | ||
|
||
|
||
@_add_docstring_wrapper("numcodecs.quantize.Quantize") | ||
class Quantize(_NumcodecsArrayArrayCodec): | ||
codec_name = f"{CODEC_PREFIX}quantize" | ||
|
||
class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"): | ||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
|
@@ -326,13 +262,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: | |
return self | ||
|
||
|
||
@_add_docstring_wrapper("numcodecs.packbits.PackBits") | ||
class PackBits(_NumcodecsArrayArrayCodec): | ||
codec_name = f"{CODEC_PREFIX}packbits" | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"): | ||
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: | ||
return replace( | ||
chunk_spec, | ||
|
@@ -345,13 +275,7 @@ def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: | |
raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") | ||
|
||
|
||
@_add_docstring_wrapper("numcodecs.astype.AsType") | ||
class AsType(_NumcodecsArrayArrayCodec): | ||
codec_name = f"{CODEC_PREFIX}astype" | ||
|
||
def __init__(self, **codec_config: JSON) -> None: | ||
super().__init__(**codec_config) | ||
|
||
class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): | ||
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: | ||
return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] | ||
|
||
|
@@ -362,19 +286,39 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: | |
|
||
|
||
# bytes-to-bytes checksum codecs | ||
CRC32 = _add_docstring(_make_checksum_codec("crc32", "CRC32"), "numcodecs.checksum32.CRC32") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These codecs break the pattern of the ones above because they have |
||
CRC32C = _add_docstring(_make_checksum_codec("crc32c", "CRC32C"), "numcodecs.checksum32.CRC32C") | ||
Adler32 = _add_docstring(_make_checksum_codec("adler32", "Adler32"), "numcodecs.checksum32.Adler32") | ||
Fletcher32 = _add_docstring( | ||
_make_checksum_codec("fletcher32", "Fletcher32"), "numcodecs.fletcher32.Fletcher32" | ||
) | ||
JenkinsLookup3 = _add_docstring( | ||
_make_checksum_codec("jenkins_lookup3", "JenkinsLookup3"), "numcodecs.checksum32.JenkinsLookup3" | ||
) | ||
class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): | ||
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: | ||
return input_byte_length + 4 # pragma: no cover | ||
|
||
|
||
class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): | ||
pass | ||
|
||
|
||
class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"): | ||
pass | ||
|
||
|
||
class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"): | ||
pass | ||
|
||
|
||
class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"): | ||
pass | ||
|
||
|
||
class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): | ||
pass | ||
|
||
|
||
# array-to-bytes codecs | ||
PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec") | ||
ZFPY = _add_docstring(_make_array_bytes_codec("zfpy", "ZFPY"), "numcodecs.zfpy.ZFPY") | ||
class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"): | ||
pass | ||
|
||
|
||
class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"): | ||
pass | ||
|
||
|
||
__all__ = [ | ||
"BZ2", | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't remember adding this - maybe the linter did??