From 8b3eb97841a71306ef848864fd61959474655e4c Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 25 Jul 2025 10:51:01 +0200
Subject: [PATCH 1/5] fix: allow writing to v3/2 store when setting is v2/3 if
 possible

---
 docs/tutorials/zarr-v3.md        |  4 ++--
 src/anndata/_io/specs/methods.py |  4 ++--
 src/anndata/_settings.py         |  2 +-
 tests/test_backed_sparse.py      |  2 +-
 tests/test_io_elementwise.py     | 12 ------------
 tests/test_io_partial.py         |  6 ------
 tests/test_readwrite.py          | 21 ++++++++++++++++-----
 7 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/docs/tutorials/zarr-v3.md b/docs/tutorials/zarr-v3.md
index a5b4668e3..82d2e48d4 100644
--- a/docs/tutorials/zarr-v3.md
+++ b/docs/tutorials/zarr-v3.md
@@ -1,6 +1,6 @@
 # zarr-v3 Guide/Roadmap
 
-`anndata` now uses the much improved {mod}`zarr` v3 package and also allows writing of datasets in the v3 format via {attr}`anndata.settings.zarr_write_format`, with the exception of structured arrays.
+`anndata` now uses the much improved {mod}`zarr` v3 package and also allows writing of datasets in the v3 format via {attr}`anndata.settings.zarr_write_format` via {func}`anndata.io.write_zarr` or {meth}`anndata.AnnData.write_zarr`, with the exception of structured arrays.
 Users should notice a significant performance improvement, especially for cloud data, but also likely for local data as well.
 Here is a quick guide on some of our learnings so far:
 
@@ -48,7 +48,7 @@ import anndata as ad
 from collections.abc import Mapping
 from typing import Any
 
-ad.settings.zarr_write_format = 3 # Absolutely crucial! Sharding is only for the v3 file format!
+g = zarr.open_group(orig_path, mode="a", use_consolidated=False, zarr_version=3) # zarr_version 3 is default but note that sharding only works with v3!
 
 def write_sharded(group: zarr.Group, adata: ad.AnnData):
     def callback(
diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
index 97d1a8640..9fe92dfa2 100644
--- a/src/anndata/_io/specs/methods.py
+++ b/src/anndata/_io/specs/methods.py
@@ -629,7 +629,7 @@ def write_vlen_string_array_zarr(
         dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
         dtype = VariableLengthUTF8()
         filters, fill_value = None, None
-        if ad.settings.zarr_write_format == 2:
+        if f.metadata.zarr_format == 2:
             filters, fill_value = [VLenUTF8()], ""
         f.create_array(
             k,
@@ -1283,7 +1283,7 @@ def write_scalar_zarr(
         from numcodecs import VLenUTF8
         from zarr.core.dtype import VariableLengthUTF8
 
-        match ad.settings.zarr_write_format, value:
+        match f.metadata.zarr_format, value:
             case 2, str():
                 filters, dtype, fill_value = [VLenUTF8()], VariableLengthUTF8(), ""
             case 3, str():
diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py
index 22eb3680a..84ea652b7 100644
--- a/src/anndata/_settings.py
+++ b/src/anndata/_settings.py
@@ -447,7 +447,7 @@ def validate_zarr_write_format(format: int):
 settings.register(
     "zarr_write_format",
     default_value=2,
-    description="Which version of zarr to write to.",
+    description="Which version of zarr to write to when anndata must internally open a write-able zarr group.",
     validate=validate_zarr_write_format,
     get_from_env=lambda name, default: check_and_get_environ_var(
         f"ANNDATA_{name.upper()}",
diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index 3acdc4a55..c4f8deffd 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -517,7 +517,7 @@ def test_data_access(
         data,
         store=path / "X" / "data",
         chunks=(1,),
-        zarr_format=ad.settings.zarr_write_format,
+        zarr_format=f.metadata.zarr_format,
     )
     store = AccessTrackingStore(path)
     store.initialize_key_trackers(["X/data"])
diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
index e03f21842..3bb18e489 100644
--- a/tests/test_io_elementwise.py
+++ b/tests/test_io_elementwise.py
@@ -206,14 +206,6 @@ def create_sparse_store(
     ],
 )
 def test_io_spec(store, value, encoding_type):
-    # zarr v3 can't write recarray
-    # https://github.com/zarr-developers/zarr-python/issues/2134
-    if (
-        ad.settings.zarr_write_format == 3
-        and encoding_type == "anndata"
-        and "O_recarray" in value.uns
-    ):
-        del value.uns["O_recarray"]
     with ad.settings.override(allow_write_nullable_strings=True):
         key = f"key_for_{encoding_type}"
         write_elem(store, key, value, dataset_kwargs={})
@@ -564,10 +556,6 @@ def test_write_to_root(store, value):
     """
     Test that elements which are written as groups can we written to the root group.
     """
-    # zarr v3 can't write recarray
-    # https://github.com/zarr-developers/zarr-python/issues/2134
-    if ad.settings.zarr_write_format == 3 and isinstance(value, ad.AnnData):
-        del value.uns["O_recarray"]
     write_elem(store, "/", value)
     # See: https://github.com/zarr-developers/zarr-python/issues/2716
     if isinstance(store, ZarrGroup) and not is_zarr_v2():
diff --git a/tests/test_io_partial.py b/tests/test_io_partial.py
index 1476d6f4f..7ed96e9fa 100644
--- a/tests/test_io_partial.py
+++ b/tests/test_io_partial.py
@@ -10,7 +10,6 @@
 import zarr
 from scipy.sparse import csr_matrix
 
-import anndata
 from anndata import AnnData
 from anndata._io.specs.registry import read_elem_partial
 from anndata.io import read_elem, write_h5ad, write_zarr
@@ -51,11 +50,6 @@ def test_read_partial_adata(tmp_path, diskfmt):
         import scanpy as sc
 
     adata = sc.datasets.pbmc68k_reduced()
-    # zarr v3 can't write recarray
-    # https://github.com/zarr-developers/zarr-python/issues/2134
-    if anndata.settings.zarr_write_format == 3 and isinstance(adata, AnnData):
-        del adata.uns["rank_genes_groups"]["scores"]
-        del adata.uns["rank_genes_groups"]["names"]
 
     path = Path(tmp_path) / ("test_rp." + diskfmt)
 
diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py
index dd41e994a..5c513866e 100644
--- a/tests/test_readwrite.py
+++ b/tests/test_readwrite.py
@@ -821,11 +821,6 @@ def test_scanpy_pbmc68k(tmp_path, diskfmt, roundtrip, diskfmt2):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", ad.OldFormatWarning)
         pbmc = sc.datasets.pbmc68k_reduced()
-        # zarr v3 can't write recarray
-        # https://github.com/zarr-developers/zarr-python/issues/2134
-        if ad.settings.zarr_write_format == 3:
-            del pbmc.uns["rank_genes_groups"]["names"]
-            del pbmc.uns["rank_genes_groups"]["scores"]
 
     from_disk1 = roundtrip(pbmc, filepth1)  # Do we read okay
     from_disk2 = roundtrip2(from_disk1, filepth2)  # Can we round trip
@@ -985,3 +980,19 @@ def test_write_elem_consolidated(tmp_path: Path):
         ValueError, match="Cannot overwrite/edit a store with consolidated metadata"
     ):
         ad.io.write_elem(g["obs"], "foo", np.arange(10))
+
+
+@pytest.mark.zarr_io
+def test_write_elem_version_mismatch(tmp_path: Path):
+    if is_zarr_v2():
+        pytest.skip("zarr v3 package test")
+    zarr_path = tmp_path / "foo.zarr"
+    adata = ad.AnnData(np.ones((10, 10)))
+    g = zarr.open_group(
+        zarr_path,
+        mode="w",
+        zarr_format=2 if ad.settings.zarr_write_format == 3 else 3,
+    )
+    ad.io.write_elem(g, "/", adata)
+    adata_roundtripped = ad.read_zarr(g)
+    assert_equal(adata_roundtripped, adata)

From 510d8680987a1bba6e009b33c7cdf24db242a5d7 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Fri, 25 Jul 2025 10:57:05 +0200
Subject: [PATCH 2/5] fix: `zarr_format`

---
 tests/test_backed_sparse.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py
index c4f8deffd..2f8381279 100644
--- a/tests/test_backed_sparse.py
+++ b/tests/test_backed_sparse.py
@@ -513,11 +513,14 @@ def test_data_access(
     data = f["X/data"][...]
     del f["X/data"]
     # chunk one at a time to count properly
+    kwargs = {}
+    if not is_zarr_v2():
+        kwargs["zarr_format"] = f.metadata.zarr_format
     zarr.array(
         data,
         store=path / "X" / "data",
         chunks=(1,),
-        zarr_format=f.metadata.zarr_format,
+        **kwargs,
     )
     store = AccessTrackingStore(path)
     store.initialize_key_trackers(["X/data"])

From 1b4840c93bb4ab3d0f8fec1ca73b6924208c1a68 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 28 Jul 2025 12:04:15 +0200
Subject: [PATCH 3/5] fix: small things from pr review

---
 src/anndata/_io/specs/methods.py | 1 -
 tests/test_readwrite.py          | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
index 9fe92dfa2..a343e352b 100644
--- a/src/anndata/_io/specs/methods.py
+++ b/src/anndata/_io/specs/methods.py
@@ -700,7 +700,6 @@ def write_recarray_zarr(
     else:
         dataset_kwargs = dataset_kwargs.copy()
         dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
-        # TODO: zarr’s on-disk format v3 doesn’t support this dtype
         f.create_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
         f[k][...] = elem
 
diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py
index 5c513866e..910ce1ce2 100644
--- a/tests/test_readwrite.py
+++ b/tests/test_readwrite.py
@@ -983,9 +983,8 @@ def test_write_elem_consolidated(tmp_path: Path):
 
 
 @pytest.mark.zarr_io
+@pytest.mark.skipif(is_zarr_v2(), reason="zarr v3 package test")
 def test_write_elem_version_mismatch(tmp_path: Path):
-    if is_zarr_v2():
-        pytest.skip("zarr v3 package test")
     zarr_path = tmp_path / "foo.zarr"
     adata = ad.AnnData(np.ones((10, 10)))
     g = zarr.open_group(

From ee6296d539a7fbf64a35d6a4d3647d8dc066656b Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Mon, 28 Jul 2025 12:12:29 +0200
Subject: [PATCH 4/5] remove useless check

---
 src/anndata/_io/specs/methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
index a343e352b..82f455198 100644
--- a/src/anndata/_io/specs/methods.py
+++ b/src/anndata/_io/specs/methods.py
@@ -695,7 +695,7 @@ def write_recarray_zarr(
     from anndata.compat import _to_fixed_length_strings
 
     elem = _to_fixed_length_strings(elem)
-    if isinstance(f, H5Group) or is_zarr_v2():
+    if is_zarr_v2():
         f.create_dataset(k, data=elem, shape=elem.shape, **dataset_kwargs)
     else:
         dataset_kwargs = dataset_kwargs.copy()

From 7dd768926a06587dab8e132ff562e244a4dfba32 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 28 Jul 2025 14:35:03 +0200
Subject: [PATCH 5/5] chore: add comment

---
 src/anndata/compat/__init__.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py
index 00e81a80d..6eb4da48b 100644
--- a/src/anndata/compat/__init__.py
+++ b/src/anndata/compat/__init__.py
@@ -286,8 +286,12 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray:
     """\
     Convert variable length strings to fixed length.
 
-    Currently a workaround for
-    https://github.com/zarr-developers/zarr-python/pull/422
+    Formerly a workaround for
+    https://github.com/zarr-developers/zarr-python/pull/422,
+    resolved in https://github.com/zarr-developers/zarr-python/pull/813.
+
+    But if we didn't do this conversion, we would have to use a special codec in v2
+    for objects and v3 doesn't support objects at all.  So we leave this function as-is.
     """
     new_dtype = []
     for dt_name, (dt_type, dt_offset) in value.dtype.fields.items():