Skip to content

Commit c9d6046

Browse files
authored
fix: lazy index.name not destroyed (#2358)
1 parent fbcc350 commit c9d6046

7 files changed

Lines changed: 20 additions & 13 deletions

File tree

docs/release-notes/2358.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure the index name along `obs` and `var` are not lost when reading/writing to disk after reading with {func}`~anndata.experimental.read_lazy` {user}`ilan-gold`

src/anndata/_core/merge.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,8 @@ def make_xarray_extension_dtypes_dask(
13131313
)
13141314

13151315

1316-
DS_CONCAT_DUMMY_INDEX_NAME = "concat_index"
1316+
DS_CONCAT_DUMMY_INDEX_NAME = "_anndata_concat_index"
1317+
DS_MERGE_DUMMY_INDEX_NAME = "_anndata_merge_index"
13171318

13181319

13191320
def concat_dataset2d_on_annot_axis(
@@ -1725,7 +1726,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
17251726
if a.true_index_dim != a.index_dim:
17261727
a.index = a.true_index
17271728
annotations_with_only_dask = [
1728-
a.ds.rename({a.true_index_dim: "merge_index"})
1729+
a.ds.rename({a.true_index_dim: DS_MERGE_DUMMY_INDEX_NAME})
17291730
for a in annotations_with_only_dask
17301731
]
17311732
alt_annot = Dataset2D(

src/anndata/_core/xarray.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,11 @@ def to_memory(self, *, copy: bool = False) -> pd.DataFrame:
243243
-------
244244
:class:`pandas.DataFrame` with index set accordingly.
245245
"""
246+
from anndata._core.merge import (
247+
DS_CONCAT_DUMMY_INDEX_NAME,
248+
DS_MERGE_DUMMY_INDEX_NAME,
249+
)
250+
246251
index_key = self.ds.attrs.get("indexing_key", None)
247252
all_columns = {*self.columns, *([] if index_key is None else [index_key])}
248253
# https://github.com/pydata/xarray/issues/10419
@@ -258,7 +263,12 @@ def to_memory(self, *, copy: bool = False) -> pd.DataFrame:
258263
)
259264
if df.index.name != index_key and index_key is not None:
260265
df = df.set_index(index_key)
261-
df.index.name = None # matches old AnnData object
266+
if df.index.name in {
267+
"_index",
268+
DS_CONCAT_DUMMY_INDEX_NAME,
269+
DS_MERGE_DUMMY_INDEX_NAME,
270+
}:
271+
df.index.name = None # matches old AnnData object
262272
return df
263273

264274
@property

src/anndata/tests/helpers.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,12 +1193,7 @@ class AccessTrackingStore(LocalStore):
11931193
_accessed_keys: defaultdict[str, list[str]]
11941194

11951195
def __init__(self, *args, **kwargs):
1196-
import traceback
1197-
1198-
traceback.print_stack()
1199-
print(kwargs)
12001196
super().__init__(*args, **kwargs)
1201-
print(self._read_only)
12021197
self._access_count = Counter()
12031198
self._accessed = defaultdict(set)
12041199
self._accessed_keys = defaultdict(list)

tests/lazy/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ def adata_remote_with_store_tall_skinny_path(
124124
orig_path = tmp_path_factory.mktemp(f"orig_{worker_id}.zarr")
125125
M = 1000
126126
N = 5
127-
obs_names = pd.Index(f"cell{i}" for i in range(M))
127+
# One named, one unnamed
128+
obs_names = pd.Index((f"cell{i}" for i in range(M)), name="obs_names")
128129
var_names = pd.Index(f"gene{i}" for i in range(N))
129130
obs = gen_typed_df(M, obs_names)
130131
var = gen_typed_df(N, var_names)

tests/lazy/test_read.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,16 @@ def test_access_count_index(
9494
) -> None:
9595
adata_orig = read_zarr(adata_remote_with_store_tall_skinny_path)
9696

97-
remote_store_tall_skinny.initialize_key_trackers(["obs/_index"])
97+
remote_store_tall_skinny.initialize_key_trackers(["obs/obs_names"])
9898
read_lazy(remote_store_tall_skinny, load_annotation_index=False)
99-
remote_store_tall_skinny.assert_access_count("obs/_index", 0)
99+
remote_store_tall_skinny.assert_access_count("obs/obs_names", 0)
100100

101101
read_lazy(remote_store_tall_skinny)
102102
n_chunks = 4
103103
count_expected = ( # *2 when mask exists
104104
n_chunks * 2 if adata_orig.obs.index.dtype == "string" else n_chunks
105105
)
106-
remote_store_tall_skinny.assert_access_count("obs/_index", count_expected)
106+
remote_store_tall_skinny.assert_access_count("obs/obs_names", count_expected)
107107

108108

109109
def test_access_count_dtype(

tests/test_xarray.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def test_true_index_dim_column_subset(dataset2d, df):
8686
df_expected = dataset2d[cols].to_memory()
8787
# account for the fact that we manually set `true_index_dim`
8888
df.index = df[col]
89-
df.index.name = None
9089
pd.testing.assert_frame_equal(df_expected, df[cols])
9190

9291

0 commit comments

Comments
 (0)