diff --git a/.github/workflows/test-cpu.yml b/.github/workflows/test-cpu.yml index 25453894d..4350db1d1 100644 --- a/.github/workflows/test-cpu.yml +++ b/.github/workflows/test-cpu.yml @@ -56,6 +56,9 @@ jobs: fetch-depth: 0 filter: blob:none + - name: Install system dependencies + run: sudo apt install -y hdf5-tools + - name: Set up Python ${{ matrix.env.python }} uses: actions/setup-python@v5 with: diff --git a/docs/release-notes/2054.bugfix.md b/docs/release-notes/2054.bugfix.md new file mode 100644 index 000000000..62a9b9a1d --- /dev/null +++ b/docs/release-notes/2054.bugfix.md @@ -0,0 +1 @@ +Revert accidental change where {attr}`~anndata.AnnData.X` got written to disk when it was `None` {user}`flying-sheep` diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index ec27a3083..6c33e1ba9 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -4,7 +4,7 @@ from functools import partial from pathlib import Path from types import MappingProxyType -from typing import TYPE_CHECKING, TypeVar +from typing import TYPE_CHECKING, TypeVar, cast from warnings import warn import h5py @@ -36,11 +36,12 @@ ) if TYPE_CHECKING: - from collections.abc import Callable, Collection, Mapping, Sequence + from collections.abc import Callable, Collection, Container, Mapping, Sequence from os import PathLike from typing import Any, Literal from .._core.file_backing import AnnDataFileManager + from .._core.raw import Raw T = TypeVar("T") @@ -82,29 +83,18 @@ def write_h5ad( # TODO: Use spec writing system for this # Currently can't use write_dispatched here because this function is also called to do an # inplace update of a backed object, which would delete "/" - f = f["/"] + f = cast("h5py.Group", f["/"]) f.attrs.setdefault("encoding-type", "anndata") f.attrs.setdefault("encoding-version", "0.1.0") - if "X" in as_dense and isinstance( - adata.X, CSMatrix | BaseCompressedSparseDataset - ): - write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) - elif not (adata.isbacked and Path(adata.filename) == Path(filepath)): - # If adata.isbacked, X should already be up to date - write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) - if "raw/X" in as_dense and isinstance( - adata.raw.X, CSMatrix | BaseCompressedSparseDataset - ): - write_sparse_as_dense( - f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs - ) - write_elem(f, "raw/var", adata.raw.var, dataset_kwargs=dataset_kwargs) - write_elem( - f, "raw/varm", dict(adata.raw.varm), dataset_kwargs=dataset_kwargs - ) - elif adata.raw is not None: - write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs) + _write_x( + f, + adata, # accessing adata.X reopens adata.file if it’s backed + is_backed=adata.isbacked and adata.filename == filepath, + as_dense=as_dense, + dataset_kwargs=dataset_kwargs, + ) + _write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs) write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs) write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs) write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs) @@ -115,6 +105,41 @@ def write_h5ad( write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs) +def _write_x( + f: h5py.Group, + adata: AnnData, + *, + is_backed: bool, + as_dense: Container[str], + dataset_kwargs: Mapping[str, Any], +) -> None: + if "X" in as_dense and isinstance(adata.X, CSMatrix | BaseCompressedSparseDataset): + write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) + elif is_backed: + pass # If adata.isbacked, X should already be up to date + elif adata.X is None: + f.pop("X", None) + else: + write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) + + +def _write_raw( + f: h5py.Group, + raw: Raw, + *, + as_dense: Container[str], + dataset_kwargs: Mapping[str, Any], +) -> None: + if "raw/X" in as_dense and isinstance( + raw.X, CSMatrix | BaseCompressedSparseDataset + ): + write_sparse_as_dense(f, "raw/X", raw.X, dataset_kwargs=dataset_kwargs) + write_elem(f, "raw/var", raw.var, dataset_kwargs=dataset_kwargs) + write_elem(f, "raw/varm", dict(raw.varm), dataset_kwargs=dataset_kwargs) + elif raw is not None: + write_elem(f, "raw", raw, dataset_kwargs=dataset_kwargs) + + @report_write_key_on_error @write_spec(IOSpec("array", "0.2.0")) def write_sparse_as_dense( diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 9def0ee7a..c154f5776 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -275,7 +275,8 @@ def write_anndata( dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ): g = f.require_group(k) - _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs) + if adata.X is not None: + _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs) _writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs) _writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs) _writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs) diff --git a/src/anndata/_io/zarr.py b/src/anndata/_io/zarr.py index 3b9667300..dbfdc9483 100644 --- a/src/anndata/_io/zarr.py +++ b/src/anndata/_io/zarr.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path from typing import TYPE_CHECKING, TypeVar from warnings import warn @@ -37,8 +36,6 @@ def write_zarr( **ds_kwargs, ) -> None: """See :meth:`~anndata.AnnData.write_zarr`.""" - if isinstance(store, Path): - store = str(store) if convert_strings_to_categoricals: adata.strings_to_categoricals() if adata.raw is not None: @@ -75,9 +72,6 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa store The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class. """ - if isinstance(store, Path): - store = str(store) - f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r") # Read with handling for backwards compat diff --git a/tests/conftest.py b/tests/conftest.py index 39bbb067f..8441dc940 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,11 +24,12 @@ if TYPE_CHECKING: from collections.abc import Generator + from pathlib import Path from types import EllipsisType @pytest.fixture -def backing_h5ad(tmp_path): +def backing_h5ad(tmp_path: Path) -> Path: return tmp_path / "test.h5ad" diff --git a/tests/data/archives/v0.11.4/adata.h5ad b/tests/data/archives/v0.11.4/adata.h5ad new file mode 100644 index 000000000..1b5f4a213 Binary files /dev/null and b/tests/data/archives/v0.11.4/adata.h5ad differ diff --git a/tests/data/archives/v0.11.4/adata.zarr.zip b/tests/data/archives/v0.11.4/adata.zarr.zip new file mode 100644 index 000000000..b03479569 Binary files /dev/null and b/tests/data/archives/v0.11.4/adata.zarr.zip differ diff --git a/tests/data/archives/v0.11.4/readme.md b/tests/data/archives/v0.11.4/readme.md new file mode 100644 index 000000000..259306320 --- /dev/null +++ b/tests/data/archives/v0.11.4/readme.md @@ -0,0 +1,10 @@ +These files were written with + +```bash +uvx '--with=anndata==0.11.4' '--with=zarr<3' python -c ' +import zarr +from anndata import AnnData +adata = AnnData(shape=(10, 20)) +adata.write_zarr(zarr.ZipStore("tests/data/archives/v0.11.4/adata.zarr.zip")) +adata.write_h5ad("tests/data/archives/v0.11.4/adata.h5ad")' +``` diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 40f9e4bf3..3b8305dce 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -108,7 +108,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense): # this is very similar to the views test @pytest.mark.filterwarnings("ignore::anndata.ImplicitModificationWarning") -def test_backing(adata, tmp_path, backing_h5ad): +def test_backing(adata: ad.AnnData, tmp_path: Path, backing_h5ad: Path) -> None: assert not adata.isbacked adata.filename = backing_h5ad diff --git a/tests/test_io_backwards_compat.py b/tests/test_io_backwards_compat.py index 508734bd0..1b6f15cca 100644 --- a/tests/test_io_backwards_compat.py +++ b/tests/test_io_backwards_compat.py @@ -1,6 +1,8 @@ from __future__ import annotations from pathlib import Path +from subprocess import run +from typing import TYPE_CHECKING import pandas as pd import pytest @@ -12,25 +14,46 @@ from anndata.compat import is_zarr_v2 from anndata.tests.helpers import assert_equal +if TYPE_CHECKING: + from typing import Literal + ARCHIVE_PTH = Path(__file__).parent / "data/archives" @pytest.fixture(params=list(ARCHIVE_PTH.glob("v*")), ids=lambda x: x.name) -def archive_dir(request): +def archive_dir(request: pytest.FixtureRequest) -> Path: return request.param -def test_backwards_compat_files(archive_dir) -> None: - with pytest.warns(ad.OldFormatWarning): - from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad") - path = archive_dir / "adata.zarr.zip" - store = path if is_zarr_v2() else zarr.storage.ZipStore(path) - with pytest.warns(ad.OldFormatWarning): - from_zarr = ad.read_zarr(store) +def read_archive( + archive_dir: Path, format: Literal["h5ad", "zarr"] +) -> tuple[ad.AnnData, Path]: + if format == "h5ad": + path = archive_dir / "adata.h5ad" + return ad.read_h5ad(path), path + if format == "zarr": + path = archive_dir / "adata.zarr.zip" + store = path if is_zarr_v2() else zarr.storage.ZipStore(path) + return ad.read_zarr(store), path + pytest.fail(f"Unknown format: {format}") + +@pytest.mark.filterwarnings("ignore::anndata.OldFormatWarning") +def test_backwards_compat_files(archive_dir: Path) -> None: + from_h5ad, _ = read_archive(archive_dir, "h5ad") + from_zarr, _ = read_archive(archive_dir, "zarr") assert_equal(from_h5ad, from_zarr, exact=True) +def test_no_diff(tmp_path: Path, archive_dir: Path) -> None: + if archive_dir.name in {"v0.7.8", "v0.7.0"}: + pytest.skip("DataFrame encoding changed between 0.7 and now") + adata, in_path = read_archive(archive_dir, "h5ad") + adata.write_h5ad(out_path := tmp_path / "adata.h5ad") + diff_proc = run(["h5diff", in_path, out_path], check=False) + assert diff_proc.returncode == 0 + + def test_clean_uns_backwards_compat(tmp_path, diskfmt): pth = tmp_path / f"test_write.{diskfmt}" write = lambda x, y: getattr(x, f"write_{diskfmt}")(y) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index d3d2d46d3..7039af709 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -2,7 +2,7 @@ import re import warnings -from contextlib import contextmanager +from contextlib import contextmanager, nullcontext from functools import partial from importlib.util import find_spec from pathlib import Path @@ -38,6 +38,7 @@ ) if TYPE_CHECKING: + from collections.abc import Generator from typing import Literal HERE = Path(__file__).parent @@ -100,6 +101,15 @@ def rw(backing_h5ad): return curr, orig +@contextmanager +def open_store( + path: Path, diskfmt: Literal["h5ad", "zarr"] +) -> Generator[h5py.File | zarr.Group, None, None]: + f = zarr.open_group(path) if diskfmt == "zarr" else h5py.File(path, "r") + with f if isinstance(f, h5py.File) else nullcontext(): + yield f + + @pytest.fixture(params=[np.uint8, np.int32, np.int64, np.float32, np.float64]) def dtype(request): return request.param @@ -403,10 +413,10 @@ def check_compressed(value, key): not_compressed.append(key) if is_zarr_v2(): - with zarr.open(str(pth), "r") as f: + with zarr.open(pth, "r") as f: f.visititems(check_compressed) else: - f = zarr.open(str(pth), mode="r") + f = zarr.open(pth, mode="r") for key, value in f.members(max_depth=None): check_compressed(value, key) @@ -765,12 +775,24 @@ def test_zarr_chunk_X(tmp_path): adata = gen_adata((100, 100), X_type=np.array, **GEN_ADATA_NO_XARRAY_ARGS) adata.write_zarr(zarr_pth, chunks=(10, 10)) - z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path + z = zarr.open(zarr_pth) assert z["X"].chunks == (10, 10) from_zarr = ad.read_zarr(zarr_pth) assert_equal(from_zarr, adata) +def test_write_x_none(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> None: + adata = ad.AnnData(shape=(10, 10), obs={"a": np.ones(10)}, var={"b": np.ones(10)}) + p = tmp_path / f"adata.{diskfmt}" + write = getattr(adata, f"write_{diskfmt}") + + write(p) + with open_store(p, diskfmt) as f: + root_keys = list(f.keys()) + + assert "X" not in root_keys + + ################################ # Round-tripping scanpy datasets ################################