fix: empty Dataset2D warning + columns setting implementation (#2307)

ilan-gold · web-flow · commit 3bac10ce3fcb · 2026-01-23T11:20:11.000Z
diff --git a/benchmarks/benchmarks/dataset2d.py b/benchmarks/benchmarks/dataset2d.py
@@ -40,7 +40,8 @@ def setup_cache(self):
                 if writing_string_array_on_disk := (
                     isinstance(v, np.ndarray) and df["a"].dtype == "string"
                 ):
-                    df["a"] = df["a"].to_numpy()
+                    with pd.option_context("future.infer_string", False):  # noqa: FBT003
+                        df["a"] = df["a"].to_numpy()
                 with ad.settings.override(allow_write_nullable_strings=True):
                     ad.io.write_elem(store, "df", df)
                 if writing_string_array_on_disk:
diff --git a/docs/conf.py b/docs/conf.py
@@ -91,7 +91,7 @@
 nitpick_ignore = [  # APIs without an intersphinx entry
     # These APIs aren’t actually documented
     ("py:class", "anndata._core.raw.Raw"),
-    ("py:class", "pandas._libs.missing.NAType"),
+    ("py:class", "pandas.api.typing.NAType"),
     # TODO: remove zappy support; the zappy repo is archived
     ("py:class", "anndata.compat.ZappyArray"),
 ]
diff --git a/docs/release-notes/0.12.0.md b/docs/release-notes/0.12.0.md
@@ -39,7 +39,7 @@
 - {guilabel}`rc1` {func}`anndata.register_anndata_namespace` functionality for adding custom functionality to an {class}`~anndata.AnnData` object {user}`srivarra` ({pr}`1870`)
 - {guilabel}`rc2` Allow xarray Datasets to be used for obs/var/obsm/varm. {user}`ilia-kats` ({pr}`1966`)
 - {guilabel}`rc4` {class}`anndata.experimental.backed.Dataset2D` now takes a compositional approach to wrapping {class}`xarray.Dataset` which may have breaking changes over the past release versions. {user}`ilan-gold` ({pr}`1997`)
-- {guilabel}`rc4` Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {attr}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2011`)
+- {guilabel}`rc4` Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {data}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold` ({pr}`2011`)
 
 #### Performance
 
diff --git a/src/anndata/_core/xarray.py b/src/anndata/_core/xarray.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pandas as pd
 
+from anndata._warnings import warn
+
 from ..compat import XDataArray, XDataset, XVariable, pandas_as_str
 
 if TYPE_CHECKING:
@@ -282,6 +284,16 @@ def columns(self) -> pd.Index:
             columns.discard(index_key)
         return pd.Index(columns)
 
+    @columns.setter
+    def columns(self, val) -> None:
+        if len(self.columns.symmetric_difference(val)) > 0:
+            msg = "Trying to rename the keys of the mapping with new names - please use a different API to rename the keys of the underlying dataset mapping."
+            raise ValueError(msg)
+        warn(
+            "Renaming or reordering columns on `Dataset2D` has no effect because the underlying data structure has no apparent ordering on its keys",
+            UserWarning,
+        )
+
     def __setitem__(
         self, key: Hashable | Iterable[Hashable] | Mapping, value: Any
     ) -> None:
diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py
@@ -198,6 +198,18 @@ def test_unconsolidated(tmp_path: Path, mtx_format):
     store.assert_access_count("obs/.zgroup", 1)
 
 
+@pytest.mark.zarr_io
+def test_empty_df_warns(tmp_path: Path):
+    adata = AnnData(X=np.ones((10, 10)))
+    zarr_path = tmp_path / "orig.zarr"
+    adata.write_zarr(zarr_path)
+    with pytest.warns(
+        UserWarning,
+        match=r"Renaming or reordering columns on `Dataset2D` has no effect",
+    ):
+        adata.obs = read_elem_lazy(zarr.open(zarr_path)["obs"])
+
+
 def test_h5_file_obj(tmp_path: Path):
     adata = gen_adata((10, 10), **GEN_ADATA_NO_XARRAY_ARGS)
     orig_pth = tmp_path / "adata.h5ad"
diff --git a/tests/test_xarray.py b/tests/test_xarray.py
@@ -32,6 +32,22 @@ def test_columns(df, dataset2d):
     assert np.all(dataset2d.columns.sort_values() == df.columns.sort_values())
 
 
+@pytest.mark.parametrize("same_columns", [True, False], ids=["same", "different"])
+def test_columns_setter(df, dataset2d: Dataset2D, *, same_columns: bool):
+    dataset2d_orig = dataset2d.copy()
+    with (
+        pytest.warns(
+            UserWarning, match=r"Renaming or reordering columns on `Dataset2D`"
+        )
+        if same_columns
+        else pytest.raises(ValueError, match=r"Trying to rename the keys")
+    ):
+        dataset2d.columns = (
+            dataset2d.columns if same_columns else pd.Index(["not", "a", "column"])
+        )
+    assert dataset2d.equals(dataset2d_orig)
+
+
 def test_to_memory(df, dataset2d):
     memory_df = dataset2d.to_memory()
     assert np.all(df == memory_df)

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@`
`91`	`91`	`nitpick_ignore = [ # APIs without an intersphinx entry`
`92`	`92`	`# These APIs aren’t actually documented`
`93`	`93`	`("py:class", "anndata._core.raw.Raw"),`
`94`		`- ("py:class", "pandas._libs.missing.NAType"),`
	`94`	`+ ("py:class", "pandas.api.typing.NAType"),`
`95`	`95`	`# TODO: remove zappy support; the zappy repo is archived`
`96`	`96`	`("py:class", "anndata.compat.ZappyArray"),`
`97`	`97`	`]`