fix: allow writing to v3/2 store when setting is v2/3 if possible#2051
Merged
Conversation
…to ig/zarr_group_version
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## main #2051 +/- ##
==========================================
- Coverage 87.60% 85.56% -2.05%
==========================================
Files 46 46
Lines 7052 7052
==========================================
- Hits 6178 6034 -144
- Misses 874 1018 +144
|
flying-sheep
requested changes
Jul 28, 2025
…to ig/zarr_group_version
flying-sheep
approved these changes
Jul 28, 2025
meeseeksmachine
pushed a commit
to meeseeksmachine/anndata
that referenced
this pull request
Jul 29, 2025
…ng is v2/3 if possible
flying-sheep
pushed a commit
that referenced
this pull request
Jul 29, 2025
…2/3 if possible (#2061) Co-authored-by: Ilan Gold <ilanbassgold@gmail.com>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
As of the
mainbranch, the followingwill produce a (to most people) strange error:
Details
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[6], line 5 3 g = zarr.open("foo.zarr") 4 assert ad.settings.zarr_write_format == 2 ----> 5 ad.io.write_elem(g, "/", adata) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:521, in write_elem(store, k, elem, dataset_kwargs) 497 def write_elem( 498 store: GroupStorageType, 499 k: str, (...) 502 dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), 503 ) -> None: 504 """ 505 Write an element to a storage group using anndata encoding. 506 (...) 519 E.g. for zarr this would be `chunks`, `compressor`. 520 """ --> 521 Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs) File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs) 246 raise ValueError(msg) 247 try: --> 248 return func(*args, **kwargs) 249 except Exception as e: 250 path = _get_display_path(store) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:392, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers) 389 write_func = self.find_write_func(dest_type, elem, modifiers) 391 if self.callback is None: --> 392 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) 393 return self.callback( 394 write_func, 395 store, (...) 399 iospec=self.registry.get_spec(elem), 400 ) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:76, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs) 74 @wraps(func) 75 def wrapper(g: GroupStorageType, k: str, *args, **kwargs): ---> 76 result = func(g, k, *args, **kwargs) 77 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 78 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/Projects/Theis/anndata/src/anndata/_io/specs/methods.py:279, in write_anndata(f, k, adata, _writer, dataset_kwargs) 277 g = f.require_group(k) 278 _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs) --> 279 _writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs) 280 _writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs) 281 _writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs) File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs) 246 raise ValueError(msg) 247 try: --> 248 return func(*args, **kwargs) 249 except Exception as e: 250 path = _get_display_path(store) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:392, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers) 389 write_func = self.find_write_func(dest_type, elem, modifiers) 391 if self.callback is None: --> 392 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) 393 return self.callback( 394 write_func, 395 store, (...) 399 iospec=self.registry.get_spec(elem), 400 ) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:76, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs) 74 @wraps(func) 75 def wrapper(g: GroupStorageType, k: str, *args, **kwargs): ---> 76 result = func(g, k, *args, **kwargs) 77 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 78 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/Projects/Theis/anndata/src/anndata/_io/specs/methods.py:1000, in write_dataframe(f, key, df, _writer, dataset_kwargs) 995 group.attrs["_index"] = check_key(index_name) 997 # ._values is "the best" array representation. It's the true array backing the 998 # object, where `.values` is always a np.ndarray and .array is always a pandas 999 # array. -> 1000 _writer.write_elem( 1001 group, index_name, df.index._values, dataset_kwargs=dataset_kwargs 1002 ) 1003 for colname, series in df.items(): 1004 # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending) 1005 _writer.write_elem( 1006 group, colname, series._values, dataset_kwargs=dataset_kwargs 1007 ) File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:248, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs) 246 raise ValueError(msg) 247 try: --> 248 return func(*args, **kwargs) 249 except Exception as e: 250 path = _get_display_path(store) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:392, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers) 389 write_func = self.find_write_func(dest_type, elem, modifiers) 391 if self.callback is None: --> 392 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) 393 return self.callback( 394 write_func, 395 store, (...) 399 iospec=self.registry.get_spec(elem), 400 ) File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:76, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs) 74 @wraps(func) 75 def wrapper(g: GroupStorageType, k: str, *args, **kwargs): ---> 76 result = func(g, k, *args, **kwargs) 77 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 78 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:308, in zero_dim_array_as_scalar.<locals>.func_wrapper(f, k, elem, _writer, dataset_kwargs) 306 _writer.write_elem(f, k, elem[()], dataset_kwargs=dataset_kwargs) 307 else: --> 308 func(f, k, elem, _writer=_writer, dataset_kwargs=dataset_kwargs) File ~/Projects/Theis/anndata/src/anndata/_io/specs/methods.py:634, in write_vlen_string_array_zarr(f, k, elem, _writer, dataset_kwargs) 632 if ad.settings.zarr_write_format == 2: 633 filters, fill_value = [VLenUTF8()], "" --> 634 f.create_array( 635 k, 636 shape=elem.shape, 637 dtype=dtype, 638 filters=filters, 639 fill_value=fill_value, 640 **dataset_kwargs, 641 ) 642 f[k][:] = elem File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/group.py:2545, in Group.create_array(self, name, shape, dtype, data, chunks, shards, filters, compressors, compressor, serializer, fill_value, order, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config, write_data) 2442 """Create an array within this group. 2443 2444 This method lightly wraps :func:`zarr.core.array.create_array`. (...) 2539 AsyncArray 2540 """ 2541 compressors = _parse_deprecated_compressor( 2542 compressor, compressors, zarr_format=self.metadata.zarr_format 2543 ) 2544 return Array( -> 2545 self._sync( 2546 self._async_group.create_array( 2547 name=name, 2548 shape=shape, 2549 dtype=dtype, 2550 data=data, 2551 chunks=chunks, 2552 shards=shards, 2553 fill_value=fill_value, 2554 attributes=attributes, 2555 chunk_key_encoding=chunk_key_encoding, 2556 compressors=compressors, 2557 serializer=serializer, 2558 dimension_names=dimension_names, 2559 order=order, 2560 filters=filters, 2561 overwrite=overwrite, 2562 storage_options=storage_options, 2563 config=config, 2564 write_data=write_data, 2565 ) 2566 ) 2567 ) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/sync.py:208, in SyncMixin._sync(self, coroutine) 205 def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: 206 # TODO: refactor this to to take *args and **kwargs and pass those to the method 207 # this should allow us to better type the sync wrapper --> 208 return sync( 209 coroutine, 210 timeout=config.get("async.timeout"), 211 ) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/sync.py:163, in sync(coro, loop, timeout) 160 return_result = next(iter(finished)).result() 162 if isinstance(return_result, BaseException): --> 163 raise return_result 164 else: 165 return return_result File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/sync.py:119, in _runner(coro) 114 """ 115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an 116 exception, the exception will be returned. 117 """ 118 try: --> 119 return await coro 120 except Exception as ex: 121 return ex File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/group.py:1122, in AsyncGroup.create_array(self, name, shape, dtype, data, chunks, shards, filters, compressors, compressor, serializer, fill_value, order, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config, write_data) 1022 """Create an array within this group. 1023 1024 This method lightly wraps :func:`zarr.core.array.create_array`. (...) 1117 1118 """ 1119 compressors = _parse_deprecated_compressor( 1120 compressor, compressors, zarr_format=self.metadata.zarr_format 1121 ) -> 1122 return await create_array( 1123 store=self.store_path, 1124 name=name, 1125 shape=shape, 1126 dtype=dtype, 1127 data=data, 1128 chunks=chunks, 1129 shards=shards, 1130 filters=filters, 1131 compressors=compressors, 1132 serializer=serializer, 1133 fill_value=fill_value, 1134 order=order, 1135 zarr_format=self.metadata.zarr_format, 1136 attributes=attributes, 1137 chunk_key_encoding=chunk_key_encoding, 1138 dimension_names=dimension_names, 1139 storage_options=storage_options, 1140 overwrite=overwrite, 1141 config=config, 1142 write_data=write_data, 1143 ) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:4515, in create_array(store, name, shape, dtype, data, chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config, write_data) 4510 mode: Literal["a"] = "a" 4512 store_path = await make_store_path( 4513 store, path=name, mode=mode, storage_options=storage_options 4514 ) -> 4515 return await init_array( 4516 store_path=store_path, 4517 shape=shape_parsed, 4518 dtype=dtype_parsed, 4519 chunks=chunks, 4520 shards=shards, 4521 filters=filters, 4522 compressors=compressors, 4523 serializer=serializer, 4524 fill_value=fill_value, 4525 order=order, 4526 zarr_format=zarr_format, 4527 attributes=attributes, 4528 chunk_key_encoding=chunk_key_encoding, 4529 dimension_names=dimension_names, 4530 overwrite=overwrite, 4531 config=config, 4532 ) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:4301, in init_array(store_path, shape, dtype, chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, attributes, chunk_key_encoding, dimension_names, overwrite, config) 4289 meta = AsyncArray._create_metadata_v2( 4290 shape=shape_parsed, 4291 dtype=zdtype, (...) 4298 attributes=attributes, 4299 ) 4300 else: -> 4301 array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3( 4302 compressors=compressors, 4303 filters=filters, 4304 serializer=serializer, 4305 dtype=zdtype, 4306 ) 4307 sub_codecs = cast("tuple[Codec, ...]", (*array_array, array_bytes, *bytes_bytes)) 4308 codecs_out: tuple[Codec, ...] File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:4828, in _parse_chunk_encoding_v3(compressors, filters, serializer, dtype) 4826 else: 4827 maybe_array_array = cast("Iterable[Codec | dict[str, JSON]]", filters) -> 4828 out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array) 4830 if serializer == "auto": 4831 out_array_bytes = default_serializer_v3(dtype) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:4828, in <genexpr>(.0) 4826 else: 4827 maybe_array_array = cast("Iterable[Codec | dict[str, JSON]]", filters) -> 4828 out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array) 4830 if serializer == "auto": 4831 out_array_bytes = default_serializer_v3(dtype) File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/registry.py:235, in _parse_array_array_codec(data) 233 else: 234 if not isinstance(data, ArrayArrayCodec): --> 235 raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.") 236 result = data 237 return result TypeError: Expected a ArrayArrayCodec. Got <class 'numcodecs.vlen.VLenUTF8'> instead. Error raised while writing key '_index' of <class 'zarr.core.group.Group'> to /obs