Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/2054.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Revert accidental change where {attr}`~anndata.AnnData.X` got written to disk when it was `None` {user}`flying-sheep`
69 changes: 47 additions & 22 deletions src/anndata/_io/h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from functools import partial
from pathlib import Path
from types import MappingProxyType
from typing import TYPE_CHECKING, TypeVar
from typing import TYPE_CHECKING, TypeVar, cast
from warnings import warn

import h5py
Expand Down Expand Up @@ -36,11 +36,12 @@
)

if TYPE_CHECKING:
from collections.abc import Callable, Collection, Mapping, Sequence
from collections.abc import Callable, Collection, Container, Mapping, Sequence
from os import PathLike
from typing import Any, Literal

from .._core.file_backing import AnnDataFileManager
from .._core.raw import Raw

T = TypeVar("T")

Expand Down Expand Up @@ -82,29 +83,18 @@ def write_h5ad(
# TODO: Use spec writing system for this
# Currently can't use write_dispatched here because this function is also called to do an
# inplace update of a backed object, which would delete "/"
f = f["/"]
f = cast("h5py.Group", f["/"])
f.attrs.setdefault("encoding-type", "anndata")
f.attrs.setdefault("encoding-version", "0.1.0")

if "X" in as_dense and isinstance(
adata.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
elif not (adata.isbacked and Path(adata.filename) == Path(filepath)):
# If adata.isbacked, X should already be up to date
write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
if "raw/X" in as_dense and isinstance(
adata.raw.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(
f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs
)
write_elem(f, "raw/var", adata.raw.var, dataset_kwargs=dataset_kwargs)
write_elem(
f, "raw/varm", dict(adata.raw.varm), dataset_kwargs=dataset_kwargs
)
elif adata.raw is not None:
write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
_write_x(
f,
adata, # accessing adata.X reopens adata.file if it’s backed
is_backed=adata.isbacked and adata.filename == filepath,
as_dense=as_dense,
dataset_kwargs=dataset_kwargs,
)
_write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs)
write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
Expand All @@ -115,6 +105,41 @@ def write_h5ad(
write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)


def _write_x(
f: h5py.Group,
adata: AnnData,
*,
is_backed: bool,
as_dense: Container[str],
dataset_kwargs: Mapping[str, Any],
) -> None:
if "X" in as_dense and isinstance(adata.X, CSMatrix | BaseCompressedSparseDataset):
write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
elif is_backed:
pass # If adata.isbacked, X should already be up to date
elif adata.X is None:
f.pop("X", None)
Copy link
Copy Markdown
Member Author

@flying-sheep flying-sheep Jul 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding this pop branch make Ruff complain about too high complexity, so I extracted _write_x and _write_raw.

The logic should otherwise be the same for raw, and almost the same for X:

  1. check if X is to be written sparse-to-dense, if yes, do it, else
  2. check if X is backed and the file path matches, if yes we’re done with X, else
  3. (new) if X is None, remove "X" from the HDF5 group if it’s there, else
  4. write X normally to the file

else:
write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)


def _write_raw(
f: h5py.Group,
raw: Raw,
*,
as_dense: Container[str],
dataset_kwargs: Mapping[str, Any],
) -> None:
if "raw/X" in as_dense and isinstance(
raw.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(f, "raw/X", raw.X, dataset_kwargs=dataset_kwargs)
write_elem(f, "raw/var", raw.var, dataset_kwargs=dataset_kwargs)
write_elem(f, "raw/varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
elif raw is not None:
write_elem(f, "raw", raw, dataset_kwargs=dataset_kwargs)


@report_write_key_on_error
@write_spec(IOSpec("array", "0.2.0"))
def write_sparse_as_dense(
Expand Down
3 changes: 2 additions & 1 deletion src/anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ def write_anndata(
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
):
g = f.require_group(k)
_writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
if adata.X is not None:
_writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
Expand Down
6 changes: 0 additions & 6 deletions src/anndata/_io/zarr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, TypeVar
from warnings import warn

Expand Down Expand Up @@ -37,8 +36,6 @@ def write_zarr(
**ds_kwargs,
) -> None:
"""See :meth:`~anndata.AnnData.write_zarr`."""
if isinstance(store, Path):
store = str(store)
if convert_strings_to_categoricals:
adata.strings_to_categoricals()
if adata.raw is not None:
Expand Down Expand Up @@ -75,9 +72,6 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa
store
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
"""
if isinstance(store, Path):
store = str(store)

f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r")

# Read with handling for backwards compat
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@

if TYPE_CHECKING:
from collections.abc import Generator
from pathlib import Path
from types import EllipsisType


@pytest.fixture
def backing_h5ad(tmp_path):
def backing_h5ad(tmp_path: Path) -> Path:
return tmp_path / "test.h5ad"


Expand Down
Binary file added tests/data/archives/v0.11.4/adata.h5ad
Binary file not shown.
Binary file added tests/data/archives/v0.11.4/adata.zarr.zip
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/data/archives/v0.11.4/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
These files were written with

```bash
uvx '--with=anndata==0.11.4' '--with=zarr<3' python -c '
import zarr
from anndata import AnnData
adata = AnnData(shape=(10, 20))
adata.write_zarr(zarr.ZipStore("tests/data/archives/v0.11.4/adata.zarr.zip"))
adata.write_h5ad("tests/data/archives/v0.11.4/adata.h5ad")'
```
2 changes: 1 addition & 1 deletion tests/test_backed_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense):

# this is very similar to the views test
@pytest.mark.filterwarnings("ignore::anndata.ImplicitModificationWarning")
def test_backing(adata, tmp_path, backing_h5ad):
def test_backing(adata: ad.AnnData, tmp_path: Path, backing_h5ad: Path) -> None:
assert not adata.isbacked

adata.filename = backing_h5ad
Expand Down
39 changes: 31 additions & 8 deletions tests/test_io_backwards_compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from pathlib import Path
from subprocess import run
from typing import TYPE_CHECKING

import pandas as pd
import pytest
Expand All @@ -12,25 +14,46 @@
from anndata.compat import is_zarr_v2
from anndata.tests.helpers import assert_equal

if TYPE_CHECKING:
from typing import Literal

ARCHIVE_PTH = Path(__file__).parent / "data/archives"


@pytest.fixture(params=list(ARCHIVE_PTH.glob("v*")), ids=lambda x: x.name)
def archive_dir(request):
def archive_dir(request: pytest.FixtureRequest) -> Path:
return request.param


def test_backwards_compat_files(archive_dir) -> None:
with pytest.warns(ad.OldFormatWarning):
from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad")
path = archive_dir / "adata.zarr.zip"
store = path if is_zarr_v2() else zarr.storage.ZipStore(path)
with pytest.warns(ad.OldFormatWarning):
from_zarr = ad.read_zarr(store)
def read_archive(
archive_dir: Path, format: Literal["h5ad", "zarr"]
) -> tuple[ad.AnnData, Path]:
if format == "h5ad":
path = archive_dir / "adata.h5ad"
return ad.read_h5ad(path), path
if format == "zarr":
path = archive_dir / "adata.zarr.zip"
store = path if is_zarr_v2() else zarr.storage.ZipStore(path)
return ad.read_zarr(store), path
pytest.fail(f"Unknown format: {format}")


@pytest.mark.filterwarnings("ignore::anndata.OldFormatWarning")
def test_backwards_compat_files(archive_dir: Path) -> None:
from_h5ad, _ = read_archive(archive_dir, "h5ad")
from_zarr, _ = read_archive(archive_dir, "zarr")
assert_equal(from_h5ad, from_zarr, exact=True)


def test_no_diff(tmp_path: Path, archive_dir: Path) -> None:
if archive_dir.name in {"v0.7.8", "v0.7.0"}:
pytest.skip("DataFrame encoding changed between 0.7 and now")
adata, in_path = read_archive(archive_dir, "h5ad")
adata.write_h5ad(out_path := tmp_path / "adata.h5ad")
diff_proc = run(["h5diff", in_path, out_path], check=False)
assert diff_proc.returncode == 0


def test_clean_uns_backwards_compat(tmp_path, diskfmt):
pth = tmp_path / f"test_write.{diskfmt}"
write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
Expand Down
30 changes: 26 additions & 4 deletions tests/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
import warnings
from contextlib import contextmanager
from contextlib import contextmanager, nullcontext
from functools import partial
from importlib.util import find_spec
from pathlib import Path
Expand Down Expand Up @@ -38,6 +38,7 @@
)

if TYPE_CHECKING:
from collections.abc import Generator
from typing import Literal

HERE = Path(__file__).parent
Expand Down Expand Up @@ -100,6 +101,15 @@ def rw(backing_h5ad):
return curr, orig


@contextmanager
def open_store(
path: Path, diskfmt: Literal["h5ad", "zarr"]
) -> Generator[h5py.File | zarr.Group, None, None]:
f = zarr.open_group(path) if diskfmt == "zarr" else h5py.File(path, "r")
with f if isinstance(f, h5py.File) else nullcontext():
yield f


@pytest.fixture(params=[np.uint8, np.int32, np.int64, np.float32, np.float64])
def dtype(request):
return request.param
Expand Down Expand Up @@ -403,10 +413,10 @@ def check_compressed(value, key):
not_compressed.append(key)

if is_zarr_v2():
with zarr.open(str(pth), "r") as f:
with zarr.open(pth, "r") as f:
f.visititems(check_compressed)
else:
f = zarr.open(str(pth), mode="r")
f = zarr.open(pth, mode="r")
for key, value in f.members(max_depth=None):
check_compressed(value, key)

Expand Down Expand Up @@ -765,12 +775,24 @@ def test_zarr_chunk_X(tmp_path):
adata = gen_adata((100, 100), X_type=np.array, **GEN_ADATA_NO_XARRAY_ARGS)
adata.write_zarr(zarr_pth, chunks=(10, 10))

z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path
z = zarr.open(zarr_pth)
assert z["X"].chunks == (10, 10)
from_zarr = ad.read_zarr(zarr_pth)
assert_equal(from_zarr, adata)


def test_write_x_none(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> None:
adata = ad.AnnData(shape=(10, 10), obs={"a": np.ones(10)}, var={"b": np.ones(10)})
p = tmp_path / f"adata.{diskfmt}"
write = getattr(adata, f"write_{diskfmt}")

write(p)
with open_store(p, diskfmt) as f:
root_keys = list(f.keys())

assert "X" not in root_keys


################################
# Round-tripping scanpy datasets
################################
Expand Down
Loading