From 2bac6cf7435731c527fdb82f06a50c443149b322 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 11:55:23 +0200 Subject: [PATCH 1/3] fix: allow categorical index in `read_lazy` --- src/anndata/_io/specs/lazy_methods.py | 10 +++++++++- tests/lazy/test_read.py | 11 +++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index b6ad2e591..d10116252 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -304,7 +304,15 @@ def read_dataframe( if not use_range_index: dim_name = elem.attrs["_index"] # no sense in reading this in multiple times - index = elem_dict[dim_name].compute() + if isinstance(elem_dict[dim_name], DaskArray): + index = elem_dict[dim_name].compute() + else: + import xarray + + index = elem_dict[dim_name][ + xarray.core.indexing.BasicIndexer((slice(None),)) + ].array + index = pd.Index(index) else: dim_name = DUMMY_RANGE_INDEX_KEY index = pd.RangeIndex(len(elem_dict[elem.attrs["_index"]])).astype("str") diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index 9624668d6..c4aa4f113 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -211,3 +211,14 @@ def test_chunks_df( for k in ds: if isinstance(arr := ds[k].data, DaskArray): assert arr.chunksize == expected_chunks + + +@pytest.mark.zarr_io +def test_categorical_idx(tmp_path: Path): + adata = AnnData(shape=[10, 20]) + adata.obs.index = adata.obs.index.astype("category") + orig_pth = tmp_path / "categorical_id.zarr" + adata.write_zarr(orig_pth) + remote = read_lazy(orig_pth) + remote_to_memory = remote.to_memory() + assert_equal(adata, remote_to_memory) From 4a1c49e68b7025d65fabbb8b365409aa5648aaef Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 12:16:45 +0200 Subject: [PATCH 2/3] chore: add warning --- src/anndata/_io/specs/lazy_methods.py | 3 +++ tests/lazy/test_read.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index d10116252..bc51ee004 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from contextlib import contextmanager from functools import partial, singledispatch from pathlib import Path @@ -312,6 +313,8 @@ def read_dataframe( index = elem_dict[dim_name][ xarray.core.indexing.BasicIndexer((slice(None),)) ].array + msg = f"Found non-string indices {index} in on-disk {elem} store" + warnings.warn(msg, stacklevel=2) index = pd.Index(index) else: dim_name = DUMMY_RANGE_INDEX_KEY diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index c4aa4f113..81d8c9780 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -219,6 +219,7 @@ def test_categorical_idx(tmp_path: Path): adata.obs.index = adata.obs.index.astype("category") orig_pth = tmp_path / "categorical_id.zarr" adata.write_zarr(orig_pth) - remote = read_lazy(orig_pth) + with pytest.warns(UserWarning, match=r"non-string indices"): + remote = read_lazy(orig_pth) remote_to_memory = remote.to_memory() assert_equal(adata, remote_to_memory) From 2d77e899430d5409c4ce555607a9506634f99e54 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 12:18:40 +0200 Subject: [PATCH 3/3] chore: relnote --- docs/release-notes/2062.bugfix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/release-notes/2062.bugfix.md diff --git a/docs/release-notes/2062.bugfix.md b/docs/release-notes/2062.bugfix.md new file mode 100644 index 000000000..d72a295b5 --- /dev/null +++ b/docs/release-notes/2062.bugfix.md @@ -0,0 +1 @@ +Allow reading of non-string index in {func}`anndata.experimental.read_lazy` and {func}`anndata.experimental.read_elem_lazy` for {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold`