Skip to content

Commit f73a150

Browse files
ilan-goldmeeseeksmachine
authored andcommitted
Backport PR scverse#2051: fix: allow writing to v3/2 store when setting is v2/3 if possible
1 parent 05ffe86 commit f73a150

8 files changed

Lines changed: 30 additions & 31 deletions

File tree

docs/tutorials/zarr-v3.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# zarr-v3 Guide/Roadmap
22

3-
`anndata` now uses the much improved {mod}`zarr` v3 package and also allows writing of datasets in the v3 format via {attr}`anndata.settings.zarr_write_format`, with the exception of structured arrays.
3+
`anndata` now uses the much improved {mod}`zarr` v3 package and also allows writing of datasets in the v3 format via {attr}`anndata.settings.zarr_write_format` via {func}`anndata.io.write_zarr` or {meth}`anndata.AnnData.write_zarr`, with the exception of structured arrays.
44
Users should notice a significant performance improvement, especially for cloud data, but also likely for local data as well.
55
Here is a quick guide on some of our learnings so far:
66

@@ -48,7 +48,7 @@ import anndata as ad
4848
from collections.abc import Mapping
4949
from typing import Any
5050

51-
ad.settings.zarr_write_format = 3 # Absolutely crucial! Sharding is only for the v3 file format!
51+
g = zarr.open_group(orig_path, mode="a", use_consolidated=False, zarr_version=3) # zarr_version 3 is default but note that sharding only works with v3!
5252

5353
def write_sharded(group: zarr.Group, adata: ad.AnnData):
5454
def callback(

src/anndata/_io/specs/methods.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ def write_vlen_string_array_zarr(
630630
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
631631
dtype = VariableLengthUTF8()
632632
filters, fill_value = None, None
633-
if ad.settings.zarr_write_format == 2:
633+
if f.metadata.zarr_format == 2:
634634
filters, fill_value = [VLenUTF8()], ""
635635
f.create_array(
636636
k,
@@ -1283,7 +1283,7 @@ def write_scalar_zarr(
12831283
from numcodecs import VLenUTF8
12841284
from zarr.core.dtype import VariableLengthUTF8
12851285

1286-
match ad.settings.zarr_write_format, value:
1286+
match f.metadata.zarr_format, value:
12871287
case 2, str():
12881288
filters, dtype, fill_value = [VLenUTF8()], VariableLengthUTF8(), ""
12891289
case 3, str():

src/anndata/_settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ def validate_zarr_write_format(format: int):
447447
settings.register(
448448
"zarr_write_format",
449449
default_value=2,
450-
description="Which version of zarr to write to.",
450+
description="Which version of zarr to write to when anndata must internally open a write-able zarr group.",
451451
validate=validate_zarr_write_format,
452452
get_from_env=lambda name, default: check_and_get_environ_var(
453453
f"ANNDATA_{name.upper()}",

src/anndata/compat/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,12 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray:
286286
"""\
287287
Convert variable length strings to fixed length.
288288
289-
Currently a workaround for
290-
https://github.com/zarr-developers/zarr-python/pull/422
289+
Formerly a workaround for
290+
https://github.com/zarr-developers/zarr-python/pull/422,
291+
resolved in https://github.com/zarr-developers/zarr-python/pull/813.
292+
293+
But if we didn't do this conversion, we would have to use a special codec in v2
294+
for objects and v3 doesn't support objects at all. So we leave this function as-is.
291295
"""
292296
new_dtype = []
293297
for dt_name, (dt_type, dt_offset) in value.dtype.fields.items():

tests/test_backed_sparse.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,11 +513,14 @@ def test_data_access(
513513
data = f["X/data"][...]
514514
del f["X/data"]
515515
# chunk one at a time to count properly
516+
kwargs = {}
517+
if not is_zarr_v2():
518+
kwargs["zarr_format"] = f.metadata.zarr_format
516519
zarr.array(
517520
data,
518521
store=path / "X" / "data",
519522
chunks=(1,),
520-
zarr_format=ad.settings.zarr_write_format,
523+
**kwargs,
521524
)
522525
store = AccessTrackingStore(path)
523526
store.initialize_key_trackers(["X/data"])

tests/test_io_elementwise.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,6 @@ def create_sparse_store(
206206
],
207207
)
208208
def test_io_spec(store, value, encoding_type):
209-
# zarr v3 can't write recarray
210-
# https://github.com/zarr-developers/zarr-python/issues/2134
211-
if (
212-
ad.settings.zarr_write_format == 3
213-
and encoding_type == "anndata"
214-
and "O_recarray" in value.uns
215-
):
216-
del value.uns["O_recarray"]
217209
with ad.settings.override(allow_write_nullable_strings=True):
218210
key = f"key_for_{encoding_type}"
219211
write_elem(store, key, value, dataset_kwargs={})
@@ -564,10 +556,6 @@ def test_write_to_root(store, value):
564556
"""
565557
Test that elements which are written as groups can we written to the root group.
566558
"""
567-
# zarr v3 can't write recarray
568-
# https://github.com/zarr-developers/zarr-python/issues/2134
569-
if ad.settings.zarr_write_format == 3 and isinstance(value, ad.AnnData):
570-
del value.uns["O_recarray"]
571559
write_elem(store, "/", value)
572560
# See: https://github.com/zarr-developers/zarr-python/issues/2716
573561
if isinstance(store, ZarrGroup) and not is_zarr_v2():

tests/test_io_partial.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import zarr
1111
from scipy.sparse import csr_matrix
1212

13-
import anndata
1413
from anndata import AnnData
1514
from anndata._io.specs.registry import read_elem_partial
1615
from anndata.io import read_elem, write_h5ad, write_zarr
@@ -51,11 +50,6 @@ def test_read_partial_adata(tmp_path, diskfmt):
5150
import scanpy as sc
5251

5352
adata = sc.datasets.pbmc68k_reduced()
54-
# zarr v3 can't write recarray
55-
# https://github.com/zarr-developers/zarr-python/issues/2134
56-
if anndata.settings.zarr_write_format == 3 and isinstance(adata, AnnData):
57-
del adata.uns["rank_genes_groups"]["scores"]
58-
del adata.uns["rank_genes_groups"]["names"]
5953

6054
path = Path(tmp_path) / ("test_rp." + diskfmt)
6155

tests/test_readwrite.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -843,11 +843,6 @@ def test_scanpy_pbmc68k(tmp_path, diskfmt, roundtrip, diskfmt2):
843843
with warnings.catch_warnings():
844844
warnings.simplefilter("ignore", ad.OldFormatWarning)
845845
pbmc = sc.datasets.pbmc68k_reduced()
846-
# zarr v3 can't write recarray
847-
# https://github.com/zarr-developers/zarr-python/issues/2134
848-
if ad.settings.zarr_write_format == 3:
849-
del pbmc.uns["rank_genes_groups"]["names"]
850-
del pbmc.uns["rank_genes_groups"]["scores"]
851846

852847
from_disk1 = roundtrip(pbmc, filepth1) # Do we read okay
853848
from_disk2 = roundtrip2(from_disk1, filepth2) # Can we round trip
@@ -1010,3 +1005,18 @@ def test_write_elem_consolidated(tmp_path: Path):
10101005
ValueError, match="Cannot overwrite/edit a store with consolidated metadata"
10111006
):
10121007
ad.io.write_elem(g["obs"], "foo", np.arange(10))
1008+
1009+
1010+
@pytest.mark.zarr_io
1011+
@pytest.mark.skipif(is_zarr_v2(), reason="zarr v3 package test")
1012+
def test_write_elem_version_mismatch(tmp_path: Path):
1013+
zarr_path = tmp_path / "foo.zarr"
1014+
adata = ad.AnnData(np.ones((10, 10)))
1015+
g = zarr.open_group(
1016+
zarr_path,
1017+
mode="w",
1018+
zarr_format=2 if ad.settings.zarr_write_format == 3 else 3,
1019+
)
1020+
ad.io.write_elem(g, "/", adata)
1021+
adata_roundtripped = ad.read_zarr(g)
1022+
assert_equal(adata_roundtripped, adata)

0 commit comments

Comments
 (0)