Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ jobs:
fetch-depth: 0
filter: blob:none

- name: Install system dependencies
run: sudo apt install -y hdf5-tools

- name: Set up Python ${{ matrix.env.python }}
uses: actions/setup-python@v5
with:
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/2054.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Revert accidental change where {attr}`~anndata.AnnData.X` got written to disk when it was `None` {user}`flying-sheep`
69 changes: 47 additions & 22 deletions src/anndata/_io/h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from functools import partial
from pathlib import Path
from types import MappingProxyType
from typing import TYPE_CHECKING, TypeVar
from typing import TYPE_CHECKING, TypeVar, cast
from warnings import warn

import h5py
Expand Down Expand Up @@ -36,11 +36,12 @@
)

if TYPE_CHECKING:
from collections.abc import Callable, Collection, Mapping, Sequence
from collections.abc import Callable, Collection, Container, Mapping, Sequence
from os import PathLike
from typing import Any, Literal

from .._core.file_backing import AnnDataFileManager
from .._core.raw import Raw

T = TypeVar("T")

Expand Down Expand Up @@ -82,29 +83,18 @@ def write_h5ad(
# TODO: Use spec writing system for this
# Currently can't use write_dispatched here because this function is also called to do an
# inplace update of a backed object, which would delete "/"
f = f["/"]
f = cast("h5py.Group", f["/"])
f.attrs.setdefault("encoding-type", "anndata")
f.attrs.setdefault("encoding-version", "0.1.0")

if "X" in as_dense and isinstance(
adata.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
elif not (adata.isbacked and Path(adata.filename) == Path(filepath)):
# If adata.isbacked, X should already be up to date
write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
if "raw/X" in as_dense and isinstance(
adata.raw.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(
f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs
)
write_elem(f, "raw/var", adata.raw.var, dataset_kwargs=dataset_kwargs)
write_elem(
f, "raw/varm", dict(adata.raw.varm), dataset_kwargs=dataset_kwargs
)
elif adata.raw is not None:
write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
_write_x(
f,
adata, # accessing adata.X reopens adata.file if it’s backed
is_backed=adata.isbacked and adata.filename == filepath,
as_dense=as_dense,
dataset_kwargs=dataset_kwargs,
)
_write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs)
write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
Expand All @@ -115,6 +105,41 @@ def write_h5ad(
write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)


def _write_x(
f: h5py.Group,
adata: AnnData,
*,
is_backed: bool,
as_dense: Container[str],
dataset_kwargs: Mapping[str, Any],
) -> None:
if "X" in as_dense and isinstance(adata.X, CSMatrix | BaseCompressedSparseDataset):
write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
elif is_backed:
pass # If adata.isbacked, X should already be up to date
elif adata.X is None:
f.pop("X", None)
Copy link
Copy Markdown
Member Author

@flying-sheep flying-sheep Jul 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding this pop branch make Ruff complain about too high complexity, so I extracted _write_x and _write_raw.

The logic should otherwise be the same for raw, and almost the same for X:

  1. check if X is to be written sparse-to-dense, if yes, do it, else
  2. check if X is backed and the file path matches, if yes we’re done with X, else
  3. (new) if X is None, remove "X" from the HDF5 group if it’s there, else
  4. write X normally to the file

else:
write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)


def _write_raw(
f: h5py.Group,
raw: Raw,
*,
as_dense: Container[str],
dataset_kwargs: Mapping[str, Any],
) -> None:
if "raw/X" in as_dense and isinstance(
raw.X, CSMatrix | BaseCompressedSparseDataset
):
write_sparse_as_dense(f, "raw/X", raw.X, dataset_kwargs=dataset_kwargs)
write_elem(f, "raw/var", raw.var, dataset_kwargs=dataset_kwargs)
write_elem(f, "raw/varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
elif raw is not None:
write_elem(f, "raw", raw, dataset_kwargs=dataset_kwargs)


@report_write_key_on_error
@write_spec(IOSpec("array", "0.2.0"))
def write_sparse_as_dense(
Expand Down
3 changes: 2 additions & 1 deletion src/anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ def write_anndata(
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
):
g = f.require_group(k)
_writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
if adata.X is not None:
_writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs)
_writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
Expand Down
6 changes: 0 additions & 6 deletions src/anndata/_io/zarr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, TypeVar
from warnings import warn

Expand Down Expand Up @@ -37,8 +36,6 @@ def write_zarr(
**ds_kwargs,
) -> None:
"""See :meth:`~anndata.AnnData.write_zarr`."""
if isinstance(store, Path):
store = str(store)
if convert_strings_to_categoricals:
adata.strings_to_categoricals()
if adata.raw is not None:
Expand Down Expand Up @@ -75,9 +72,6 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa
store
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
"""
if isinstance(store, Path):
store = str(store)

f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r")

# Read with handling for backwards compat
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@

if TYPE_CHECKING:
from collections.abc import Generator
from pathlib import Path
from types import EllipsisType


@pytest.fixture
def backing_h5ad(tmp_path):
def backing_h5ad(tmp_path: Path) -> Path:
return tmp_path / "test.h5ad"


Expand Down
Binary file added tests/data/archives/v0.11.4/adata.h5ad
Binary file not shown.
Binary file added tests/data/archives/v0.11.4/adata.zarr.zip
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/data/archives/v0.11.4/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
These files were written with

```bash
uvx '--with=anndata==0.11.4' '--with=zarr<3' python -c '
import zarr
from anndata import AnnData
adata = AnnData(shape=(10, 20))
adata.write_zarr(zarr.ZipStore("tests/data/archives/v0.11.4/adata.zarr.zip"))
adata.write_h5ad("tests/data/archives/v0.11.4/adata.h5ad")'
```
2 changes: 1 addition & 1 deletion tests/test_backed_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense):

# this is very similar to the views test
@pytest.mark.filterwarnings("ignore::anndata.ImplicitModificationWarning")
def test_backing(adata, tmp_path, backing_h5ad):
def test_backing(adata: ad.AnnData, tmp_path: Path, backing_h5ad: Path) -> None:
assert not adata.isbacked

adata.filename = backing_h5ad
Expand Down
39 changes: 31 additions & 8 deletions tests/test_io_backwards_compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from pathlib import Path
from subprocess import run
from typing import TYPE_CHECKING

import pandas as pd
import pytest
Expand All @@ -12,25 +14,46 @@
from anndata.compat import is_zarr_v2
from anndata.tests.helpers import assert_equal

if TYPE_CHECKING:
from typing import Literal

ARCHIVE_PTH = Path(__file__).parent / "data/archives"


@pytest.fixture(params=list(ARCHIVE_PTH.glob("v*")), ids=lambda x: x.name)
def archive_dir(request):
def archive_dir(request: pytest.FixtureRequest) -> Path:
return request.param


def test_backwards_compat_files(archive_dir) -> None:
with pytest.warns(ad.OldFormatWarning):
from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad")
path = archive_dir / "adata.zarr.zip"
store = path if is_zarr_v2() else zarr.storage.ZipStore(path)
with pytest.warns(ad.OldFormatWarning):
from_zarr = ad.read_zarr(store)
def read_archive(
archive_dir: Path, format: Literal["h5ad", "zarr"]
) -> tuple[ad.AnnData, Path]:
if format == "h5ad":
path = archive_dir / "adata.h5ad"
return ad.read_h5ad(path), path
if format == "zarr":
path = archive_dir / "adata.zarr.zip"
store = path if is_zarr_v2() else zarr.storage.ZipStore(path)
return ad.read_zarr(store), path
pytest.fail(f"Unknown format: {format}")


@pytest.mark.filterwarnings("ignore::anndata.OldFormatWarning")
def test_backwards_compat_files(archive_dir: Path) -> None:
from_h5ad, _ = read_archive(archive_dir, "h5ad")
from_zarr, _ = read_archive(archive_dir, "zarr")
assert_equal(from_h5ad, from_zarr, exact=True)


def test_no_diff(tmp_path: Path, archive_dir: Path) -> None:
if archive_dir.name in {"v0.7.8", "v0.7.0"}:
pytest.skip("DataFrame encoding changed between 0.7 and now")
adata, in_path = read_archive(archive_dir, "h5ad")
adata.write_h5ad(out_path := tmp_path / "adata.h5ad")
diff_proc = run(["h5diff", in_path, out_path], check=False)
assert diff_proc.returncode == 0


def test_clean_uns_backwards_compat(tmp_path, diskfmt):
pth = tmp_path / f"test_write.{diskfmt}"
write = lambda x, y: getattr(x, f"write_{diskfmt}")(y)
Expand Down
30 changes: 26 additions & 4 deletions tests/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
import warnings
from contextlib import contextmanager
from contextlib import contextmanager, nullcontext
from functools import partial
from importlib.util import find_spec
from pathlib import Path
Expand Down Expand Up @@ -38,6 +38,7 @@
)

if TYPE_CHECKING:
from collections.abc import Generator
from typing import Literal

HERE = Path(__file__).parent
Expand Down Expand Up @@ -100,6 +101,15 @@ def rw(backing_h5ad):
return curr, orig


@contextmanager
def open_store(
path: Path, diskfmt: Literal["h5ad", "zarr"]
) -> Generator[h5py.File | zarr.Group, None, None]:
f = zarr.open_group(path) if diskfmt == "zarr" else h5py.File(path, "r")
with f if isinstance(f, h5py.File) else nullcontext():
yield f


@pytest.fixture(params=[np.uint8, np.int32, np.int64, np.float32, np.float64])
def dtype(request):
return request.param
Expand Down Expand Up @@ -403,10 +413,10 @@ def check_compressed(value, key):
not_compressed.append(key)

if is_zarr_v2():
with zarr.open(str(pth), "r") as f:
with zarr.open(pth, "r") as f:
f.visititems(check_compressed)
else:
f = zarr.open(str(pth), mode="r")
f = zarr.open(pth, mode="r")
for key, value in f.members(max_depth=None):
check_compressed(value, key)

Expand Down Expand Up @@ -765,12 +775,24 @@ def test_zarr_chunk_X(tmp_path):
adata = gen_adata((100, 100), X_type=np.array, **GEN_ADATA_NO_XARRAY_ARGS)
adata.write_zarr(zarr_pth, chunks=(10, 10))

z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path
z = zarr.open(zarr_pth)
assert z["X"].chunks == (10, 10)
from_zarr = ad.read_zarr(zarr_pth)
assert_equal(from_zarr, adata)


def test_write_x_none(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> None:
adata = ad.AnnData(shape=(10, 10), obs={"a": np.ones(10)}, var={"b": np.ones(10)})
p = tmp_path / f"adata.{diskfmt}"
write = getattr(adata, f"write_{diskfmt}")

write(p)
with open_store(p, diskfmt) as f:
root_keys = list(f.keys())

assert "X" not in root_keys


################################
# Round-tripping scanpy datasets
################################
Expand Down
Loading