Skip to content

Commit 3bac10c

Browse files
authored
fix: empty Dataset2D warning + columns setting implementation (#2307)
1 parent 96a1529 commit 3bac10c

6 files changed

Lines changed: 44 additions & 3 deletions

File tree

benchmarks/benchmarks/dataset2d.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ def setup_cache(self):
4040
if writing_string_array_on_disk := (
4141
isinstance(v, np.ndarray) and df["a"].dtype == "string"
4242
):
43-
df["a"] = df["a"].to_numpy()
43+
with pd.option_context("future.infer_string", False): # noqa: FBT003
44+
df["a"] = df["a"].to_numpy()
4445
with ad.settings.override(allow_write_nullable_strings=True):
4546
ad.io.write_elem(store, "df", df)
4647
if writing_string_array_on_disk:

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
nitpick_ignore = [ # APIs without an intersphinx entry
9292
# These APIs aren’t actually documented
9393
("py:class", "anndata._core.raw.Raw"),
94-
("py:class", "pandas._libs.missing.NAType"),
94+
("py:class", "pandas.api.typing.NAType"),
9595
# TODO: remove zappy support; the zappy repo is archived
9696
("py:class", "anndata.compat.ZappyArray"),
9797
]

docs/release-notes/0.12.0.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
- {guilabel}`rc1` {func}`anndata.register_anndata_namespace` functionality for adding custom functionality to an {class}`~anndata.AnnData` object {user}`srivarra` ({pr}`1870`)
4040
- {guilabel}`rc2` Allow xarray Datasets to be used for obs/var/obsm/varm. {user}`ilia-kats` ({pr}`1966`)
4141
- {guilabel}`rc4` {class}`anndata.experimental.backed.Dataset2D` now takes a compositional approach to wrapping {class}`xarray.Dataset` which may have breaking changes over the past release versions. {user}`ilan-gold` ({pr}`1997`)
42-
- {guilabel}`rc4` Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {attr}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2011`)
42+
- {guilabel}`rc4` Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {data}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2011`)
4343

4444
#### Performance
4545

src/anndata/_core/xarray.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import numpy as np
99
import pandas as pd
1010

11+
from anndata._warnings import warn
12+
1113
from ..compat import XDataArray, XDataset, XVariable, pandas_as_str
1214

1315
if TYPE_CHECKING:
@@ -282,6 +284,16 @@ def columns(self) -> pd.Index:
282284
columns.discard(index_key)
283285
return pd.Index(columns)
284286

287+
@columns.setter
288+
def columns(self, val) -> None:
289+
if len(self.columns.symmetric_difference(val)) > 0:
290+
msg = "Trying to rename the keys of the mapping with new names - please use a different API to rename the keys of the underlying dataset mapping."
291+
raise ValueError(msg)
292+
warn(
293+
"Renaming or reordering columns on `Dataset2D` has no effect because the underlying data structure has no apparent ordering on its keys",
294+
UserWarning,
295+
)
296+
285297
def __setitem__(
286298
self, key: Hashable | Iterable[Hashable] | Mapping, value: Any
287299
) -> None:

tests/lazy/test_read.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,18 @@ def test_unconsolidated(tmp_path: Path, mtx_format):
198198
store.assert_access_count("obs/.zgroup", 1)
199199

200200

201+
@pytest.mark.zarr_io
202+
def test_empty_df_warns(tmp_path: Path):
203+
adata = AnnData(X=np.ones((10, 10)))
204+
zarr_path = tmp_path / "orig.zarr"
205+
adata.write_zarr(zarr_path)
206+
with pytest.warns(
207+
UserWarning,
208+
match=r"Renaming or reordering columns on `Dataset2D` has no effect",
209+
):
210+
adata.obs = read_elem_lazy(zarr.open(zarr_path)["obs"])
211+
212+
201213
def test_h5_file_obj(tmp_path: Path):
202214
adata = gen_adata((10, 10), **GEN_ADATA_NO_XARRAY_ARGS)
203215
orig_pth = tmp_path / "adata.h5ad"

tests/test_xarray.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ def test_columns(df, dataset2d):
3232
assert np.all(dataset2d.columns.sort_values() == df.columns.sort_values())
3333

3434

35+
@pytest.mark.parametrize("same_columns", [True, False], ids=["same", "different"])
36+
def test_columns_setter(df, dataset2d: Dataset2D, *, same_columns: bool):
37+
dataset2d_orig = dataset2d.copy()
38+
with (
39+
pytest.warns(
40+
UserWarning, match=r"Renaming or reordering columns on `Dataset2D`"
41+
)
42+
if same_columns
43+
else pytest.raises(ValueError, match=r"Trying to rename the keys")
44+
):
45+
dataset2d.columns = (
46+
dataset2d.columns if same_columns else pd.Index(["not", "a", "column"])
47+
)
48+
assert dataset2d.equals(dataset2d_orig)
49+
50+
3551
def test_to_memory(df, dataset2d):
3652
memory_df = dataset2d.to_memory()
3753
assert np.all(df == memory_df)

0 commit comments

Comments
 (0)