diff --git a/docs/release-notes/2399.fix.md b/docs/release-notes/2399.fix.md new file mode 100644 index 000000000..9c3ccafc1 --- /dev/null +++ b/docs/release-notes/2399.fix.md @@ -0,0 +1 @@ +Disallow {meth}`anndata.AnnData.transpose` when `X` or `layers` contains {class}`h5py.Dataset`, {class}`zarr.Array` ,{class}`anndata.abc.CSRDataset`, or {class}`anndata.abc.CSCDataset` {user}`ilan-gold`. diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 3c8574436..ae9b091be 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -1231,6 +1231,12 @@ def transpose(self) -> AnnData: "which is currently not implemented. Call `.copy()` before transposing." ) raise ValueError(msg) + if any( + isinstance(elem, ZarrArray | BaseCompressedSparseDataset | h5py.Dataset) + for elem in (self.X, *self.layers.values()) + ): + msg = "Cannot transpose anndata object that has raw zarr arrays or h5py arrays backing X or layers" + raise ValueError(msg) return AnnData( X=_safe_transpose(X) if X is not None else None, diff --git a/tests/test_base.py b/tests/test_base.py index 2c3ec4d11..d40fcd4e7 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -5,9 +5,11 @@ from itertools import product from typing import TYPE_CHECKING +import h5py import numpy as np import pandas as pd import pytest +import zarr from numpy import ma from scipy import sparse as sp from scipy.sparse import csr_matrix, issparse @@ -15,6 +17,7 @@ import anndata as ad from anndata import AnnData, ImplicitModificationWarning from anndata._core.raw import Raw +from anndata._core.sparse_dataset import sparse_dataset from anndata._settings import settings from anndata.acc import A from anndata.tests.helpers import ( @@ -834,3 +837,23 @@ def test_create_adata_from_single_axis_elem( in_memory.write_h5ad(tmp_path / "adata.h5ad") from_disk = ad.read_h5ad(tmp_path / "adata.h5ad") assert_equal(from_disk, in_memory) + + +@pytest.mark.parametrize("in_x", [True, False], ids=["X", "layers"]) +@pytest.mark.parametrize("is_sparse", [True, False], ids=["sparse", "dense"]) +@pytest.mark.parametrize("storage", ["h5ad", "zarr"]) +def test_transpose_errors_with_backed_arrays( + tmp_path: Path, storage: str, *, is_sparse: bool, in_x: bool +): + adata = AnnData(X=csr_matrix(np.ones((3, 4))) if is_sparse else np.ones((3, 4))) + path = tmp_path / f"test.{storage}" + getattr(adata, f"write_{storage}")(path) + f = (h5py.File if storage == "h5ad" else zarr.open)(path) + raw_array = sparse_dataset(f["X"]) if is_sparse else f["X"] + + adata = AnnData(**({"X": raw_array} if in_x else {"layers": {"test": raw_array}})) + + with pytest.raises(ValueError, match=r"Cannot transpose anndata object"): + adata.transpose() + if storage == "h5ad": + f.close()