Skip to content

Commit 0710fbf

Browse files
LucaMarconatoclaude
andcommitted
Fix zarr v3 compression: use native codecs and bump min deps
Use "compressors" (plural) with native zarr v3 codec objects (ZstdCodec, BloscCodec) in storage_options, which ome-zarr-py >= 0.16.0 + dask >= 2026.3.0 correctly forwards to zarr_array_kwargs. Bump minimum deps accordingly and fix test assertions for lz4 (.clevel) vs zstd (.level) and the s0 array path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent fac4cbc commit 0710fbf

3 files changed

Lines changed: 59 additions & 30 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@ dependencies = [
2626
"annsel>=0.1.2",
2727
"click",
2828
"dask-image",
29-
"dask>=2025.12.0,<2026.1.2",
30-
"distributed<2026.1.2",
29+
"dask>=2026.3.0",
30+
"distributed>=2026.3.0",
3131
"datashader",
3232
"fsspec[s3,http]",
3333
"geopandas>=0.14",
3434
"multiscale_spatial_image==2.0.3",
3535
"networkx",
3636
"numba>=0.55.0",
3737
"numpy",
38-
"ome_zarr>=0.14.0",
38+
"ome_zarr>=0.16.0",
3939
"pandas",
4040
"pooch",
4141
"pyarrow",

src/spatialdata/_io/io_raster.py

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,21 @@ def _write_raster(
342342
group.attrs[ATTRS_KEY] = attrs
343343

344344

345+
def _build_v3_codec(
346+
compression: Literal["lz4", "zstd"],
347+
compression_level: int,
348+
) -> Any:
349+
"""Return the appropriate zarr v3 codec for the given compression type and level."""
350+
if compression == "zstd":
351+
from zarr.codecs import ZstdCodec
352+
353+
return ZstdCodec(level=compression_level)
354+
# lz4: use the native zarr v3 BloscCodec
355+
from zarr.codecs import BloscCodec
356+
357+
return BloscCodec(cname="lz4", clevel=compression_level)
358+
359+
345360
def _apply_compression(
346361
storage_options: JSONDict | list[JSONDict],
347362
compressor: dict[Literal["lz4", "zstd"], int] | None,
@@ -362,32 +377,47 @@ def _apply_compression(
362377
-------
363378
Updated storage options with compression settings
364379
"""
365-
# For zarr disk format v2, use numcodecs.Blosc
366-
# For zarr disk format v3, use zarr.codecs.Blosc
367-
from numcodecs import Blosc as BloscV2
368-
from zarr.codecs import Blosc as BloscV3
369-
370380
if not compressor:
371381
return storage_options
372382

373383
((compression, compression_level),) = compressor.items()
374384

375-
assert BloscV2.SHUFFLE == 1
376-
blosc_v2 = BloscV2(cname=compression, clevel=compression_level, shuffle=1)
377-
blosc_v3 = BloscV3(cname=compression, clevel=compression_level, shuffle=1)
385+
if zarr_format == 2:
386+
from numcodecs import Blosc as BloscV2
378387

379-
def _update_dict(d: dict[str, Any]) -> None:
380-
if zarr_format == 2:
381-
d["compressor"] = blosc_v2
382-
elif zarr_format == 3:
383-
d["zarr_array_kwargs"] = {"compressors": [blosc_v3]}
388+
assert BloscV2.SHUFFLE == 1
389+
codec_v2 = BloscV2(cname=compression, clevel=compression_level, shuffle=1)
384390

385-
if isinstance(storage_options, dict):
386-
_update_dict(d=storage_options)
391+
def _update_dict(d: dict[str, Any]) -> None:
392+
d["compressor"] = codec_v2
393+
394+
if isinstance(storage_options, dict):
395+
_update_dict(d=storage_options)
396+
elif isinstance(storage_options, list):
397+
for option in storage_options:
398+
_update_dict(d=option)
399+
elif storage_options is None:
400+
return {"compressor": codec_v2}
401+
else:
402+
raise ValueError(f"storage_options must be a dict or list, not {type(storage_options)}")
387403
else:
388-
assert isinstance(storage_options, list)
389-
for option in storage_options:
390-
_update_dict(d=option)
404+
# zarr v3: use native codec objects via the "compressors" (plural) key.
405+
# see https://github.com/ome/ome-zarr-py/blob/v0.16.0/ome_zarr/writer.py#L754
406+
# ome-zarr-py ≥ 0.16.0 with dask ≥ 2026.3.0 forwards this key to zarr_array_kwargs.
407+
codec_v3 = _build_v3_codec(compression, compression_level)
408+
409+
def _update_dict_v3(d: dict[str, Any]) -> None:
410+
d["compressors"] = [codec_v3]
411+
412+
if isinstance(storage_options, dict):
413+
_update_dict_v3(d=storage_options)
414+
elif isinstance(storage_options, list):
415+
for option in storage_options:
416+
_update_dict_v3(d=option)
417+
elif storage_options is None:
418+
return {"compressors": [codec_v3]}
419+
else:
420+
raise ValueError(f"storage_options must be a dict or list, not {type(storage_options)}")
391421

392422
return storage_options
393423

tests/io/test_readwrite.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,9 @@ def test_compression_roundtrip(
265265
# sourcery skip: no-loop-in-tests
266266
for element in ["image2d", "image2d_multiscale", "labels2d", "labels2d_multiscale"]:
267267
element_type = "images" if element.startswith("image") else "labels"
268-
arr = zarr.open_group(tmpdir / element_type, mode="r")[element]["0"]
268+
arr = zarr.open_group(tmpdir / element_type, mode="r")[element]["s0"]
269269
compressor = arr.compressors[0]
270270

271-
# TODO: all these tests fail because the compression arguments are not passed to Dask
272271
if sdata_container_format.zarr_format == 2:
273272
assert compressor.cname == "zstd"
274273
assert compressor.clevel == 8
@@ -295,23 +294,23 @@ def test_write_element_compression(
295294
sdata["element"] = full_sdata[element[1]]
296295
sdata.write_element("element", compressor=compressor, sdata_formats=sdata_container_format)
297296

298-
arr = zarr.open_group(tmpdir / element[0], mode="r")["element"]["0"]
297+
arr = zarr.open_group(tmpdir / element[0], mode="r")["element"]["s0"]
299298
compression = arr.compressors[0]
300299

301-
# TODO: all these tests fail because the compression arguments are not passed to Dask
302300
if sdata_container_format.zarr_format == 2:
303301
assert compression.cname == list(compressor.keys())[0]
304302
assert compression.clevel == list(compressor.values())[0]
305303
elif sdata_container_format.zarr_format == 3:
306-
from zarr.codecs import ZstdCodec
304+
from zarr.codecs import BloscCodec, ZstdCodec
307305

308306
compressor_name = list(compressor.keys())[0]
307+
compressor_level = list(compressor.values())[0]
309308
if compressor_name == "zstd":
310309
assert isinstance(compression, ZstdCodec)
311-
# TODO: fix
312-
# elif compressor_name == 'lz4':
313-
# assert isinstance(compression, ???)
314-
assert compression.level == list(compressor.values())[0]
310+
assert compression.level == compressor_level
311+
elif compressor_name == "lz4":
312+
assert isinstance(compression, BloscCodec)
313+
assert compression.clevel == compressor_level
315314

316315
def test_incremental_io_list_of_elements(
317316
self,

0 commit comments

Comments
 (0)