From 9f48dfc344adcc67532f5ba8f0ee6f576b319be1 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 10:23:58 +0100 Subject: [PATCH 1/8] backport sparray tightening --- src/anndata/_core/anndata.py | 8 ++++---- src/anndata/_core/index.py | 10 +++++----- src/anndata/_core/merge.py | 28 +++++++++++++-------------- src/anndata/_core/sparse_dataset.py | 16 +++++++-------- src/anndata/_io/specs/lazy_methods.py | 4 ++-- src/anndata/_io/specs/methods.py | 10 +++++----- src/anndata/abc.py | 6 +++--- src/anndata/compat/__init__.py | 12 ++++++------ src/anndata/tests/helpers.py | 6 +++--- src/anndata/typing.py | 4 ++-- src/anndata/utils.py | 4 ++-- tests/test_backed_hdf5.py | 6 +++--- tests/test_backed_sparse.py | 4 ++-- tests/test_concatenate.py | 8 ++++---- tests/test_io_dispatched.py | 4 ++-- tests/test_io_elementwise.py | 4 ++-- tests/test_readwrite.py | 4 ++-- 17 files changed, 69 insertions(+), 69 deletions(-) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 82ca753a7..48ee0086e 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -26,7 +26,7 @@ from .. import utils from .._settings import settings -from ..compat import DaskArray, SpArray, ZarrArray, _move_adj_mtx +from ..compat import CSArray, DaskArray, ZarrArray, _move_adj_mtx from ..logging import anndata_logger as logger from ..utils import ( axis_len, @@ -557,7 +557,7 @@ def X(self) -> ArrayDataStructureType | None: # return X @X.setter - def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): + def X(self, value: np.ndarray | sparse.spmatrix | CSArray | None): if value is None: if self.isbacked: msg = "Cannot currently remove data matrix from backed object." @@ -615,7 +615,7 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): if sparse.issparse(self._adata_ref._X) and isinstance( value, np.ndarray ): - if isinstance(self._adata_ref.X, SpArray): + if isinstance(self._adata_ref.X, CSArray): memory_class = sparse.coo_array else: memory_class = sparse.coo_matrix @@ -1705,7 +1705,7 @@ def concatenate( # Backwards compat (some of this could be more efficient) # obs used to always be an outer join sparse_class = sparse.csr_matrix - if any(isinstance(a.X, SpArray) for a in all_adatas): + if any(isinstance(a.X, CSArray) for a in all_adatas): sparse_class = sparse.csr_array out.obs = concat( [AnnData(sparse_class(a.shape), obs=a.obs) for a in all_adatas], diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 32f69f182..3f59450f4 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -10,7 +10,7 @@ import pandas as pd from scipy.sparse import issparse, spmatrix -from ..compat import AwkArray, DaskArray, SpArray +from ..compat import AwkArray, CSArray, DaskArray if TYPE_CHECKING: from ..compat import Index, Index1D @@ -69,13 +69,13 @@ def name_idx(i): elif isinstance(indexer, str): return index.get_loc(indexer) # int elif isinstance( - indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray + indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | CSArray ): if hasattr(indexer, "shape") and ( (indexer.shape == (index.shape[0], 1)) or (indexer.shape == (1, index.shape[0])) ): - if isinstance(indexer, spmatrix | SpArray): + if isinstance(indexer, spmatrix | CSArray): indexer = indexer.toarray() indexer = np.ravel(indexer) if not isinstance(indexer, np.ndarray | pd.Index): @@ -181,8 +181,8 @@ def _subset_dask(a: DaskArray, subset_idx: Index): @_subset.register(spmatrix) -@_subset.register(SpArray) -def _subset_sparse(a: spmatrix | SpArray, subset_idx: Index): +@_subset.register(CSArray) +def _subset_sparse(a: spmatrix | CSArray, subset_idx: Index): # Correcting for indexing behaviour of sparse.spmatrix if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx): first_idx = subset_idx[0] diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index fb1c4ef07..c3e2e3eb7 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -24,11 +24,11 @@ from ..compat import ( CAN_USE_SPARSE_ARRAY, AwkArray, + CSArray, CupyArray, CupyCSRMatrix, CupySparseMatrix, DaskArray, - SpArray, _map_cat_to_str, ) from ..utils import asarray, axis_len, warn_once @@ -166,7 +166,7 @@ def equal_series(a, b) -> bool: @equal.register(sparse.spmatrix) -@equal.register(SpArray) +@equal.register(CSArray) @equal.register(CupySparseMatrix) def equal_sparse(a, b) -> bool: # It's a weird api, don't blame me @@ -174,7 +174,7 @@ def equal_sparse(a, b) -> bool: xp = array_api_compat.array_namespace(a.data) - if isinstance(b, CupySparseMatrix | sparse.spmatrix | SpArray): + if isinstance(b, CupySparseMatrix | sparse.spmatrix | CSArray): if isinstance(a, CupySparseMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 @@ -206,7 +206,7 @@ def equal_awkward(a, b) -> bool: def as_sparse(x, use_sparse_array=False): - if not isinstance(x, sparse.spmatrix | SpArray): + if not isinstance(x, sparse.spmatrix | CSArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) return sparse.csr_matrix(x) @@ -537,7 +537,7 @@ def apply(self, el, *, axis, fill_value=None): return el if isinstance(el, pd.DataFrame): return self._apply_to_df(el, axis=axis, fill_value=fill_value) - elif isinstance(el, sparse.spmatrix | SpArray | CupySparseMatrix): + elif isinstance(el, sparse.spmatrix | CSArray | CupySparseMatrix): return self._apply_to_sparse(el, axis=axis, fill_value=fill_value) elif isinstance(el, AwkArray): return self._apply_to_awkward(el, axis=axis, fill_value=fill_value) @@ -615,7 +615,7 @@ def _apply_to_array(self, el, *, axis, fill_value=None): ) def _apply_to_sparse( - self, el: sparse.spmatrix | SpArray, *, axis, fill_value=None + self, el: sparse.spmatrix | CSArray, *, axis, fill_value=None ) -> spmatrix: if isinstance(el, CupySparseMatrix): from cupyx.scipy import sparse @@ -638,7 +638,7 @@ def _apply_to_sparse( shape[axis] = len(self.new_idx) shape = tuple(shape) if fill_value == 0: - if isinstance(el, SpArray): + if isinstance(el, CSArray): memory_class = sparse.csr_array else: memory_class = sparse.csr_matrix @@ -652,7 +652,7 @@ def _apply_to_sparse( idxmtx_dtype = xp.promote_types(el.dtype, xp.array(fill_value).dtype) else: idxmtx_dtype = bool - if isinstance(el, SpArray): + if isinstance(el, CSArray): memory_class = sparse.coo_array else: memory_class = sparse.coo_matrix @@ -730,10 +730,10 @@ def default_fill_value(els): This is largely due to backwards compat, and might not be the ideal solution. """ if any( - isinstance(el, sparse.spmatrix | SpArray) + isinstance(el, sparse.spmatrix | CSArray) or ( isinstance(el, DaskArray) - and isinstance(el._meta, sparse.spmatrix | SpArray) + and isinstance(el._meta, sparse.spmatrix | CSArray) ) for el in els ): @@ -830,9 +830,9 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ], axis=axis, ) - elif any(isinstance(a, sparse.spmatrix | SpArray) for a in arrays): + elif any(isinstance(a, sparse.spmatrix | CSArray) for a in arrays): sparse_stack = (sparse.vstack, sparse.hstack)[axis] - use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays) + use_sparse_array = any(issubclass(type(a), CSArray) for a in arrays) return sparse_stack( [ f( @@ -941,7 +941,7 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): def missing_element( n: int, - els: list[SpArray | sparse.csr_matrix | sparse.csc_matrix | np.ndarray | DaskArray], + els: list[CSArray | sparse.csr_matrix | sparse.csc_matrix | np.ndarray | DaskArray], axis: Literal[0, 1] = 0, fill_value: Any | None = None, off_axis_size: int = 0, @@ -1006,7 +1006,7 @@ def concat_pairwise_mapping( mappings: Collection[Mapping], shapes: Collection[int], join_keys=intersect_keys ): result = {} - if any(any(isinstance(v, SpArray) for v in m.values()) for m in mappings): + if any(any(isinstance(v, CSArray) for v in m.values()) for m in mappings): sparse_class = sparse.csr_array else: sparse_class = sparse.csr_matrix diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index 1b9eabb5d..b47b6f9f3 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -30,7 +30,7 @@ from .. import abc from .._settings import settings -from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr +from ..compat import CSArray, H5Group, ZarrArray, ZarrGroup, _read_attr from .index import _fix_slice_bounds, _subset, unpack_index if TYPE_CHECKING: @@ -327,7 +327,7 @@ def get_memory_class( ) -> type[_cs_matrix]: for fmt, _, memory_class in FORMATS: if format == fmt: - if use_sparray_in_io and issubclass(memory_class, SpArray): + if use_sparray_in_io and issubclass(memory_class, CSArray): return memory_class elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): return memory_class @@ -340,7 +340,7 @@ def get_backed_class( ) -> type[BackedSparseMatrix]: for fmt, backed_class, _ in FORMATS: if format == fmt: - if use_sparray_in_io and issubclass(backed_class, SpArray): + if use_sparray_in_io and issubclass(backed_class, CSArray): return backed_class elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): return backed_class @@ -435,7 +435,7 @@ def __repr__(self) -> str: def __getitem__( self, index: Index | tuple[()] - ) -> float | ss.csr_matrix | ss.csc_matrix | SpArray: + ) -> float | ss.csr_matrix | ss.csc_matrix | CSArray: indices = self._normalize_index(index) row, col = indices mtx = self._to_backed() @@ -466,8 +466,8 @@ def __getitem__( mtx_fmt = get_memory_class( self.format, use_sparray_in_io=settings.use_sparse_array_on_read ) - must_convert_to_array = issubclass(mtx_fmt, SpArray) and not isinstance( - sub, SpArray + must_convert_to_array = issubclass(mtx_fmt, CSArray) and not isinstance( + sub, CSArray ) if isinstance(sub, BackedSparseMatrix) or must_convert_to_array: return mtx_fmt(sub) @@ -494,7 +494,7 @@ def __setitem__(self, index: Index | tuple[()], value) -> None: mock_matrix[row, col] = value # TODO: split to other classes? - def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None: + def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | CSArray) -> None: """Append an in-memory or on-disk sparse matrix to the current object's store. Parameters @@ -620,7 +620,7 @@ def _to_backed(self) -> BackedSparseMatrix: mtx.indptr = self._indptr return mtx - def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | SpArray: + def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | CSArray: format_class = get_memory_class( self.format, use_sparray_in_io=settings.use_sparse_array_on_read ) diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index 68b588ac7..b04aa6769 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -20,7 +20,7 @@ from collections.abc import Generator, Mapping, Sequence from typing import Literal, ParamSpec, TypeVar - from ...compat import DaskArray, H5File, SpArray + from ...compat import CSArray, DaskArray, H5File from .registry import DaskReader BlockInfo = Mapping[ @@ -72,7 +72,7 @@ def make_dask_chunk( path_or_sparse_dataset: Path | D, elem_name: str, block_info: BlockInfo | None = None, -) -> sparse.csr_matrix | sparse.csc_matrix | SpArray: +) -> sparse.csr_matrix | sparse.csc_matrix | CSArray: if block_info is None: msg = "Block info is required" raise ValueError(msg) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index b8357665f..ae5a14276 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -52,7 +52,7 @@ from numpy.typing import NDArray from anndata._types import ArrayStorageType, GroupStorageType - from anndata.compat import SpArray + from anndata.compat import CSArray from anndata.typing import AxisStorable, InMemoryArrayOrScalarType from .registry import Reader, Writer @@ -127,7 +127,7 @@ def wrapper( @_REGISTRY.register_read(H5Array, IOSpec("", "")) def read_basic( elem: H5File | H5Group | H5Array, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | CSArray: from anndata._io import h5ad warn( @@ -149,7 +149,7 @@ def read_basic( @_REGISTRY.register_read(ZarrArray, IOSpec("", "")) def read_basic_zarr( elem: ZarrGroup | ZarrArray, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | CSArray: from anndata._io import zarr warn( @@ -590,7 +590,7 @@ def write_recarray_zarr( def write_sparse_compressed( f: GroupStorageType, key: str, - value: sparse.spmatrix | SpArray, + value: sparse.spmatrix | CSArray, *, _writer: Writer, fmt: Literal["csr", "csc"], @@ -758,7 +758,7 @@ def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]: @_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0")) def read_sparse( elem: GroupStorageType, *, _reader: Reader -) -> sparse.spmatrix | SpArray: +) -> sparse.spmatrix | CSArray: return sparse_dataset(elem).to_memory() diff --git a/src/anndata/abc.py b/src/anndata/abc.py index df8c8a6e8..1b3bb88a9 100644 --- a/src/anndata/abc.py +++ b/src/anndata/abc.py @@ -9,7 +9,7 @@ import numpy as np from scipy.sparse import csc_matrix, csr_matrix - from .compat import Index, SpArray + from .compat import CSArray, Index __all__ = ["CSRDataset", "CSCDataset"] @@ -31,7 +31,7 @@ class _AbstractCSDataset(ABC): """Which file type is used on-disk.""" @abstractmethod - def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | SpArray: + def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | CSArray: """Load a slice or an element from the sparse dataset into memory. Parameters @@ -45,7 +45,7 @@ def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | SpArray """ @abstractmethod - def to_memory(self) -> csr_matrix | csc_matrix | SpArray: + def to_memory(self) -> csr_matrix | csc_matrix | CSArray: """Load the sparse dataset into memory. Returns diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 8d2312504..a60d40619 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -33,14 +33,14 @@ CAN_USE_SPARSE_ARRAY = Version(scipy.__version__) >= Version("1.11") -if not CAN_USE_SPARSE_ARRAY: +if TYPE_CHECKING or CAN_USE_SPARSE_ARRAY: + CSArray = scipy.sparse.csr_array | scipy.sparse.csc_array +else: - class SpArray: + class CSArray: @staticmethod def __repr__(): - return "mock scipy.sparse.sparray" -else: - SpArray = scipy.sparse.sparray + return "mock scipy.sparse._cs_array" class Empty: @@ -57,7 +57,7 @@ class Empty: | tuple[EllipsisType, Index1D, Index1D] | tuple[Index1D, EllipsisType, Index1D] | scipy.sparse.spmatrix - | SpArray + | CSArray ) H5Group = h5py.Group H5Array = h5py.Dataset diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index e7fe10b03..71e3b4785 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -26,12 +26,12 @@ from anndata.compat import ( CAN_USE_SPARSE_ARRAY, AwkArray, + CSArray, CupyArray, CupyCSCMatrix, CupyCSRMatrix, CupySparseMatrix, DaskArray, - SpArray, ZarrArray, ) from anndata.utils import asarray @@ -603,7 +603,7 @@ def assert_equal_sparse(a, b, exact=False, elem_name=None): assert_equal(b, a, exact, elem_name=elem_name) -@assert_equal.register(SpArray) +@assert_equal.register(CSArray) def assert_equal_sparse_array(a, b, exact=False, elem_name=None): return assert_equal_sparse(a, b, exact, elem_name) @@ -808,7 +808,7 @@ def _(a): return da.from_array(a, _half_chunk_size(a.shape)) -@as_sparse_dask_array.register(SpArray) +@as_sparse_dask_array.register(CSArray) def _(a): import dask.array as da diff --git a/src/anndata/typing.py b/src/anndata/typing.py index d13927bad..f3df44fa2 100644 --- a/src/anndata/typing.py +++ b/src/anndata/typing.py @@ -11,11 +11,11 @@ from ._core.anndata import AnnData from .compat import ( AwkArray, + CSArray, CupyArray, CupySparseMatrix, DaskArray, H5Array, - SpArray, ZappyArray, ZarrArray, ) @@ -37,7 +37,7 @@ | ma.MaskedArray | sparse.csr_matrix | sparse.csc_matrix - | SpArray + | CSArray | AwkArray | H5Array | ZarrArray diff --git a/src/anndata/utils.py b/src/anndata/utils.py index d6ce3523c..694084a1e 100644 --- a/src/anndata/utils.py +++ b/src/anndata/utils.py @@ -13,7 +13,7 @@ import anndata from ._core.sparse_dataset import BaseCompressedSparseDataset -from .compat import CupyArray, CupySparseMatrix, DaskArray, SpArray +from .compat import CSArray, CupyArray, CupySparseMatrix, DaskArray from .logging import get_logger if TYPE_CHECKING: @@ -48,7 +48,7 @@ def asarray(x): return np.asarray(x) -@asarray.register(SpArray) +@asarray.register(CSArray) @asarray.register(sparse.spmatrix) def asarray_sparse(x): return x.toarray() diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 2b584ad67..30c1d09ae 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -10,7 +10,7 @@ from scipy import sparse import anndata as ad -from anndata.compat import SpArray +from anndata.compat import CSArray from anndata.tests.helpers import ( GEN_ADATA_DASK_ARGS, as_dense_dask_array, @@ -200,8 +200,8 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): var_idx = subset_func2(mem_adata.var_names) if ( array_type is asarray - and isinstance(obs_idx, list | np.ndarray | sparse.spmatrix | SpArray) - and isinstance(var_idx, list | np.ndarray | sparse.spmatrix | SpArray) + and isinstance(obs_idx, list | np.ndarray | sparse.spmatrix | CSArray) + and isinstance(var_idx, list | np.ndarray | sparse.spmatrix | CSArray) ): pytest.xfail( "Fancy indexing does not work with multiple arrays on a h5py.Dataset" diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 499aeadbd..4516d195c 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -14,7 +14,7 @@ from anndata._core.anndata import AnnData from anndata._core.sparse_dataset import sparse_dataset from anndata._io.specs.registry import read_elem_as_dask -from anndata.compat import CAN_USE_SPARSE_ARRAY, DaskArray, SpArray +from anndata.compat import CAN_USE_SPARSE_ARRAY, CSArray, DaskArray from anndata.experimental import read_dispatched from anndata.tests.helpers import AccessTrackingStore, assert_equal, subset_func @@ -337,7 +337,7 @@ def test_read_array( if not CAN_USE_SPARSE_ARRAY: pytest.skip("scipy.sparse.cs{r,c}array not available") ad.settings.use_sparse_array_on_read = True - assert issubclass(type(diskmtx[obs_idx, var_idx]), SpArray) + assert issubclass(type(diskmtx[obs_idx, var_idx]), CSArray) ad.settings.use_sparse_array_on_read = False assert issubclass(type(diskmtx[obs_idx, var_idx]), sparse.spmatrix) diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index 13d7c7617..aabf7f104 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -20,7 +20,7 @@ from anndata import AnnData, Raw, concat from anndata._core import merge from anndata._core.index import _subset -from anndata.compat import AwkArray, CupySparseMatrix, DaskArray, SpArray +from anndata.compat import AwkArray, CSArray, CupySparseMatrix, DaskArray from anndata.tests import helpers from anndata.tests.helpers import ( BASE_MATRIX_PARAMS, @@ -69,7 +69,7 @@ def _filled_sparse(a, fill_value=None): return sparse.csr_matrix(np.broadcast_to(fill_value, a.shape)) -@filled_like.register(SpArray) +@filled_like.register(CSArray) def _filled_sparse_array(a, fill_value=None): return sparse.csr_array(filled_like(sparse.csr_matrix(a))) @@ -197,8 +197,8 @@ def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_com assert_equal(result[orig.obs_names].copy(), orig) base_type = type(orig.X) if sparse.issparse(orig.X): - if isinstance(orig.X, SpArray): - base_type = SpArray + if isinstance(orig.X, CSArray): + base_type = CSArray else: base_type = sparse.spmatrix if isinstance(orig.X, CupySparseMatrix): diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py index 0bbbf285a..b45b63e3c 100644 --- a/tests/test_io_dispatched.py +++ b/tests/test_io_dispatched.py @@ -7,7 +7,7 @@ from scipy import sparse import anndata as ad -from anndata.compat import SpArray +from anndata.compat import CSArray from anndata.experimental import read_dispatched, write_dispatched from anndata.tests.helpers import assert_equal, gen_adata @@ -96,7 +96,7 @@ def set_copy(d, **kwargs): # TODO: Should the passed path be absolute? path = "/" + store.path + "/" + k if hasattr(elem, "shape") and not isinstance( - elem, sparse.spmatrix | SpArray | ad.AnnData + elem, sparse.spmatrix | CSArray | ad.AnnData ): if re.match(r"^/((X)|(layers)).*", path): chunks = (M, N) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 0f5bfb883..be9ce62ed 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -22,7 +22,7 @@ get_spec, ) from anndata._io.specs.registry import IORegistryError -from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray, ZarrGroup, _read_attr +from anndata.compat import CAN_USE_SPARSE_ARRAY, CSArray, ZarrGroup, _read_attr from anndata.experimental import read_elem_as_dask from anndata.io import read_elem, write_elem from anndata.tests.helpers import ( @@ -630,4 +630,4 @@ def test_read_sparse_array( pytest.skip("scipy.sparse.cs{r,c}array not available") ad.settings.use_sparse_array_on_read = True mtx = ad.io.read_elem(f["mtx"]) - assert issubclass(type(mtx), SpArray) + assert issubclass(type(mtx), CSArray) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index f623a3d6a..4c4018a09 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -20,7 +20,7 @@ import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata.compat import DaskArray, SpArray, _read_attr +from anndata.compat import CSArray, DaskArray, _read_attr from anndata.tests.helpers import as_dense_dask_array, assert_equal, gen_adata if TYPE_CHECKING: @@ -159,7 +159,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa # either load as same type or load the convert DaskArray to array # since we tested if assigned types and loaded types are DaskArray # this would also work if they work - if isinstance(adata_src.raw.X, SpArray): + if isinstance(adata_src.raw.X, CSArray): assert isinstance(adata.raw.X, sparse.spmatrix) else: assert isinstance(adata_src.raw.X, type(adata.raw.X) | DaskArray) From c61f1e3d4f4c50598c451b36dd1a5367083a78b4 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 10:27:26 +0100 Subject: [PATCH 2/8] revert singledispatch to sparray --- src/anndata/_core/index.py | 4 ++-- src/anndata/_core/merge.py | 3 ++- src/anndata/compat/__init__.py | 6 ++++++ src/anndata/tests/helpers.py | 6 +++--- src/anndata/utils.py | 4 ++-- tests/test_concatenate.py | 4 ++-- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 3f59450f4..2b186a29d 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -10,7 +10,7 @@ import pandas as pd from scipy.sparse import issparse, spmatrix -from ..compat import AwkArray, CSArray, DaskArray +from ..compat import AwkArray, CSArray, DaskArray, SpArray if TYPE_CHECKING: from ..compat import Index, Index1D @@ -181,7 +181,7 @@ def _subset_dask(a: DaskArray, subset_idx: Index): @_subset.register(spmatrix) -@_subset.register(CSArray) +@_subset.register(SpArray) def _subset_sparse(a: spmatrix | CSArray, subset_idx: Index): # Correcting for indexing behaviour of sparse.spmatrix if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx): diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index c3e2e3eb7..218283304 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -29,6 +29,7 @@ CupyCSRMatrix, CupySparseMatrix, DaskArray, + SpArray, _map_cat_to_str, ) from ..utils import asarray, axis_len, warn_once @@ -166,7 +167,7 @@ def equal_series(a, b) -> bool: @equal.register(sparse.spmatrix) -@equal.register(CSArray) +@equal.register(SpArray) @equal.register(CupySparseMatrix) def equal_sparse(a, b) -> bool: # It's a weird api, don't blame me diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index a60d40619..b257b0d7d 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -34,9 +34,15 @@ CAN_USE_SPARSE_ARRAY = Version(scipy.__version__) >= Version("1.11") if TYPE_CHECKING or CAN_USE_SPARSE_ARRAY: + SpArray = scipy.sparse.sparray CSArray = scipy.sparse.csr_array | scipy.sparse.csc_array else: + class SpArray: + @staticmethod + def __repr__(): + return "mock scipy.sparse.sparray" + class CSArray: @staticmethod def __repr__(): diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 71e3b4785..e7fe10b03 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -26,12 +26,12 @@ from anndata.compat import ( CAN_USE_SPARSE_ARRAY, AwkArray, - CSArray, CupyArray, CupyCSCMatrix, CupyCSRMatrix, CupySparseMatrix, DaskArray, + SpArray, ZarrArray, ) from anndata.utils import asarray @@ -603,7 +603,7 @@ def assert_equal_sparse(a, b, exact=False, elem_name=None): assert_equal(b, a, exact, elem_name=elem_name) -@assert_equal.register(CSArray) +@assert_equal.register(SpArray) def assert_equal_sparse_array(a, b, exact=False, elem_name=None): return assert_equal_sparse(a, b, exact, elem_name) @@ -808,7 +808,7 @@ def _(a): return da.from_array(a, _half_chunk_size(a.shape)) -@as_sparse_dask_array.register(CSArray) +@as_sparse_dask_array.register(SpArray) def _(a): import dask.array as da diff --git a/src/anndata/utils.py b/src/anndata/utils.py index 694084a1e..d6ce3523c 100644 --- a/src/anndata/utils.py +++ b/src/anndata/utils.py @@ -13,7 +13,7 @@ import anndata from ._core.sparse_dataset import BaseCompressedSparseDataset -from .compat import CSArray, CupyArray, CupySparseMatrix, DaskArray +from .compat import CupyArray, CupySparseMatrix, DaskArray, SpArray from .logging import get_logger if TYPE_CHECKING: @@ -48,7 +48,7 @@ def asarray(x): return np.asarray(x) -@asarray.register(CSArray) +@asarray.register(SpArray) @asarray.register(sparse.spmatrix) def asarray_sparse(x): return x.toarray() diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index aabf7f104..a4b3505ee 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -20,7 +20,7 @@ from anndata import AnnData, Raw, concat from anndata._core import merge from anndata._core.index import _subset -from anndata.compat import AwkArray, CSArray, CupySparseMatrix, DaskArray +from anndata.compat import AwkArray, CSArray, CupySparseMatrix, DaskArray, SpArray from anndata.tests import helpers from anndata.tests.helpers import ( BASE_MATRIX_PARAMS, @@ -69,7 +69,7 @@ def _filled_sparse(a, fill_value=None): return sparse.csr_matrix(np.broadcast_to(fill_value, a.shape)) -@filled_like.register(CSArray) +@filled_like.register(SpArray) def _filled_sparse_array(a, fill_value=None): return sparse.csr_array(filled_like(sparse.csr_matrix(a))) From 2198d8b66f7942ec18439790315eb056eadca864 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 10:57:53 +0100 Subject: [PATCH 3/8] matrix too --- src/anndata/_core/aligned_mapping.py | 5 ++--- src/anndata/_core/anndata.py | 8 ++++---- src/anndata/_core/index.py | 8 ++++---- src/anndata/_core/merge.py | 20 +++++++++----------- src/anndata/_core/raw.py | 7 +++---- src/anndata/_core/sparse_dataset.py | 16 +++++++--------- src/anndata/_io/h5ad.py | 13 +++++++------ src/anndata/_io/specs/methods.py | 15 ++++++++------- src/anndata/abc.py | 7 +++---- src/anndata/compat/__init__.py | 3 ++- src/anndata/typing.py | 5 ++--- tests/test_backed_hdf5.py | 6 +++--- tests/test_backed_sparse.py | 18 +++++++++--------- tests/test_concatenate.py | 15 +++++++++++---- tests/test_concatenate_disk.py | 2 +- tests/test_io_conversion.py | 5 +++-- tests/test_io_dispatched.py | 5 ++--- tests/test_io_elementwise.py | 10 ++++++++-- tests/test_readwrite.py | 5 ++--- 19 files changed, 90 insertions(+), 83 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 88d5dde0d..fd6577162 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -9,10 +9,9 @@ import numpy as np import pandas as pd -from scipy.sparse import spmatrix from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning -from ..compat import AwkArray +from ..compat import AwkArray, CSArray, CSMatrix from ..utils import ( axis_len, convert_to_dict, @@ -36,7 +35,7 @@ OneDIdx = Sequence[int] | Sequence[bool] | slice TwoDIdx = tuple[OneDIdx, OneDIdx] # TODO: pd.DataFrame only allowed in AxisArrays? -Value = pd.DataFrame | spmatrix | np.ndarray +Value = pd.DataFrame | CSArray | CSMatrix | np.ndarray P = TypeVar("P", bound="AlignedMappingBase") """Parent mapping an AlignedView is based on.""" diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 48ee0086e..3255b6a0e 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -195,13 +195,13 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): def __init__( self, - X: np.ndarray | sparse.spmatrix | pd.DataFrame | None = None, + X: ArrayDataStructureType | pd.DataFrame | None = None, obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, uns: Mapping[str, Any] | None = None, obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, - layers: Mapping[str, np.ndarray | sparse.spmatrix] | None = None, + layers: Mapping[str, ArrayDataStructureType] | None = None, raw: Mapping[str, Any] | None = None, dtype: np.dtype | type | str | None = None, shape: tuple[int, int] | None = None, @@ -557,7 +557,7 @@ def X(self) -> ArrayDataStructureType | None: # return X @X.setter - def X(self, value: np.ndarray | sparse.spmatrix | CSArray | None): + def X(self, value: ArrayDataStructureType | None): if value is None: if self.isbacked: msg = "Cannot currently remove data matrix from backed object." @@ -1159,7 +1159,7 @@ def _inplace_subset_obs(self, index: Index1D): self._init_as_actual(adata_subset) # TODO: Update, possibly remove - def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix): + def __setitem__(self, index: Index, val: float | ArrayDataStructureType): if self.is_view: msg = "Object is view and cannot be accessed with `[]`." raise ValueError(msg) diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 2b186a29d..9b37ced52 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -10,7 +10,7 @@ import pandas as pd from scipy.sparse import issparse, spmatrix -from ..compat import AwkArray, CSArray, DaskArray, SpArray +from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, SpArray if TYPE_CHECKING: from ..compat import Index, Index1D @@ -69,13 +69,13 @@ def name_idx(i): elif isinstance(indexer, str): return index.get_loc(indexer) # int elif isinstance( - indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | CSArray + indexer, Sequence | np.ndarray | pd.Index | CSMatrix | np.matrix | CSArray ): if hasattr(indexer, "shape") and ( (indexer.shape == (index.shape[0], 1)) or (indexer.shape == (1, index.shape[0])) ): - if isinstance(indexer, spmatrix | CSArray): + if isinstance(indexer, CSMatrix | CSArray): indexer = indexer.toarray() indexer = np.ravel(indexer) if not isinstance(indexer, np.ndarray | pd.Index): @@ -182,7 +182,7 @@ def _subset_dask(a: DaskArray, subset_idx: Index): @_subset.register(spmatrix) @_subset.register(SpArray) -def _subset_sparse(a: spmatrix | CSArray, subset_idx: Index): +def _subset_sparse(a: CSMatrix | CSArray, subset_idx: Index): # Correcting for indexing behaviour of sparse.spmatrix if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx): first_idx = subset_idx[0] diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 218283304..2c61bef4e 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -25,6 +25,7 @@ CAN_USE_SPARSE_ARRAY, AwkArray, CSArray, + CSMatrix, CupyArray, CupyCSRMatrix, CupySparseMatrix, @@ -175,7 +176,7 @@ def equal_sparse(a, b) -> bool: xp = array_api_compat.array_namespace(a.data) - if isinstance(b, CupySparseMatrix | sparse.spmatrix | CSArray): + if isinstance(b, CupySparseMatrix | CSMatrix | CSArray): if isinstance(a, CupySparseMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 @@ -206,8 +207,8 @@ def equal_awkward(a, b) -> bool: return ak.almost_equal(a, b) -def as_sparse(x, use_sparse_array=False): - if not isinstance(x, sparse.spmatrix | CSArray): +def as_sparse(x, use_sparse_array: bool = False) -> CSMatrix | CSArray: + if not isinstance(x, CSMatrix | CSArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) return sparse.csr_matrix(x) @@ -538,7 +539,7 @@ def apply(self, el, *, axis, fill_value=None): return el if isinstance(el, pd.DataFrame): return self._apply_to_df(el, axis=axis, fill_value=fill_value) - elif isinstance(el, sparse.spmatrix | CSArray | CupySparseMatrix): + elif isinstance(el, CSMatrix | CSArray | CupySparseMatrix): return self._apply_to_sparse(el, axis=axis, fill_value=fill_value) elif isinstance(el, AwkArray): return self._apply_to_awkward(el, axis=axis, fill_value=fill_value) @@ -616,7 +617,7 @@ def _apply_to_array(self, el, *, axis, fill_value=None): ) def _apply_to_sparse( - self, el: sparse.spmatrix | CSArray, *, axis, fill_value=None + self, el: CSMatrix | CSArray, *, axis, fill_value=None ) -> spmatrix: if isinstance(el, CupySparseMatrix): from cupyx.scipy import sparse @@ -731,11 +732,8 @@ def default_fill_value(els): This is largely due to backwards compat, and might not be the ideal solution. """ if any( - isinstance(el, sparse.spmatrix | CSArray) - or ( - isinstance(el, DaskArray) - and isinstance(el._meta, sparse.spmatrix | CSArray) - ) + isinstance(el, CSMatrix | CSArray) + or (isinstance(el, DaskArray) and isinstance(el._meta, CSMatrix | CSArray)) for el in els ): return 0 @@ -831,7 +829,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ], axis=axis, ) - elif any(isinstance(a, sparse.spmatrix | CSArray) for a in arrays): + elif any(isinstance(a, CSMatrix | CSArray) for a in arrays): sparse_stack = (sparse.vstack, sparse.hstack)[axis] use_sparse_array = any(issubclass(type(a), CSArray) for a in arrays) return sparse_stack( diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index f71c8d74d..b183d0a3b 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -17,8 +17,7 @@ from collections.abc import Mapping, Sequence from typing import ClassVar - from scipy import sparse - + from ..compat import CSMatrix from .aligned_mapping import AxisArraysView from .anndata import AnnData from .sparse_dataset import BaseCompressedSparseDataset @@ -31,7 +30,7 @@ class Raw: def __init__( self, adata: AnnData, - X: np.ndarray | sparse.spmatrix | None = None, + X: np.ndarray | CSMatrix | None = None, var: pd.DataFrame | Mapping[str, Sequence] | None = None, varm: AxisArrays | Mapping[str, np.ndarray] | None = None, ): @@ -67,7 +66,7 @@ def _get_X(self, layer=None): return self.X @property - def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: + def X(self) -> BaseCompressedSparseDataset | np.ndarray | CSMatrix: # TODO: Handle unsorted array of integer indices for h5py.Datasets if not self._adata.isbacked: return self._X diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index b47b6f9f3..6b1ec7a9e 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -30,7 +30,7 @@ from .. import abc from .._settings import settings -from ..compat import CSArray, H5Group, ZarrArray, ZarrGroup, _read_attr +from ..compat import CSArray, CSMatrix, H5Group, ZarrArray, ZarrGroup, _read_attr from .index import _fix_slice_bounds, _subset, unpack_index if TYPE_CHECKING: @@ -67,7 +67,7 @@ class BackedSparseMatrix(_cs_matrix): indices: GroupStorageType indptr: np.ndarray - def copy(self) -> ss.csr_matrix | ss.csc_matrix: + def copy(self) -> CSMatrix: if isinstance(self.data, h5py.Dataset): return sparse_dataset(self.data.parent).to_memory() if isinstance(self.data, ZarrArray): @@ -329,7 +329,7 @@ def get_memory_class( if format == fmt: if use_sparray_in_io and issubclass(memory_class, CSArray): return memory_class - elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(memory_class, CSMatrix): return memory_class msg = f"Format string {format} is not supported." raise ValueError(msg) @@ -342,7 +342,7 @@ def get_backed_class( if format == fmt: if use_sparray_in_io and issubclass(backed_class, CSArray): return backed_class - elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(backed_class, CSMatrix): return backed_class msg = f"Format string {format} is not supported." raise ValueError(msg) @@ -433,9 +433,7 @@ def __repr__(self) -> str: name = type(self).__name__.removeprefix("_") return f"{name}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}" - def __getitem__( - self, index: Index | tuple[()] - ) -> float | ss.csr_matrix | ss.csc_matrix | CSArray: + def __getitem__(self, index: Index | tuple[()]) -> float | CSMatrix | CSArray: indices = self._normalize_index(index) row, col = indices mtx = self._to_backed() @@ -494,7 +492,7 @@ def __setitem__(self, index: Index | tuple[()], value) -> None: mock_matrix[row, col] = value # TODO: split to other classes? - def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | CSArray) -> None: + def append(self, sparse_matrix: CSMatrix | CSArray) -> None: """Append an in-memory or on-disk sparse matrix to the current object's store. Parameters @@ -620,7 +618,7 @@ def _to_backed(self) -> BackedSparseMatrix: mtx.indptr = self._indptr return mtx - def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | CSArray: + def to_memory(self) -> CSMatrix | CSArray: format_class = get_memory_class( self.format, use_sparray_in_io=settings.use_sparse_array_on_read ) diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index ff33dc2f3..1417d3814 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -18,6 +18,7 @@ from .._core.file_backing import filename from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import ( + CSMatrix, _clean_uns, _decode_structured_array, _from_fixed_length_strings, @@ -82,14 +83,14 @@ def write_h5ad( f.attrs.setdefault("encoding-version", "0.1.0") if "X" in as_dense and isinstance( - adata.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.X, CSMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) elif not (adata.isbacked and Path(adata.filename) == Path(filepath)): # If adata.isbacked, X should already be up to date write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) if "raw/X" in as_dense and isinstance( - adata.raw.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.raw.X, CSMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense( f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs @@ -115,7 +116,7 @@ def write_h5ad( def write_sparse_as_dense( f: h5py.Group, key: str, - value: sparse.spmatrix | BaseCompressedSparseDataset, + value: CSMatrix | BaseCompressedSparseDataset, *, dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ): @@ -172,7 +173,7 @@ def read_h5ad( backed: Literal["r", "r+"] | bool | None = None, *, as_sparse: Sequence[str] = (), - as_sparse_fmt: type[sparse.spmatrix] = sparse.csr_matrix, + as_sparse_fmt: type[CSMatrix] = sparse.csr_matrix, chunk_size: int = 6000, # TODO, probably make this 2d chunks ) -> AnnData: """\ @@ -273,7 +274,7 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, + rdasp: Callable[[h5py.Dataset], CSMatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -346,7 +347,7 @@ def read_dataset(dataset: h5py.Dataset): @report_read_key_on_error def read_dense_as_sparse( - dataset: h5py.Dataset, sparse_format: sparse.spmatrix, axis_chunk: int + dataset: h5py.Dataset, sparse_format: CSMatrix, axis_chunk: int ): if sparse_format == sparse.csr_matrix: return read_dense_as_csr(dataset, axis_chunk) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index ae5a14276..5bd605c17 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -52,7 +52,10 @@ from numpy.typing import NDArray from anndata._types import ArrayStorageType, GroupStorageType - from anndata.compat import CSArray + from anndata.compat import ( + CSArray, + CSMatrix, + ) from anndata.typing import AxisStorable, InMemoryArrayOrScalarType from .registry import Reader, Writer @@ -127,7 +130,7 @@ def wrapper( @_REGISTRY.register_read(H5Array, IOSpec("", "")) def read_basic( elem: H5File | H5Group | H5Array, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | CSArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | CSMatrix | CSArray: from anndata._io import h5ad warn( @@ -149,7 +152,7 @@ def read_basic( @_REGISTRY.register_read(ZarrArray, IOSpec("", "")) def read_basic_zarr( elem: ZarrGroup | ZarrArray, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | CSArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | CSMatrix | CSArray: from anndata._io import zarr warn( @@ -590,7 +593,7 @@ def write_recarray_zarr( def write_sparse_compressed( f: GroupStorageType, key: str, - value: sparse.spmatrix | CSArray, + value: CSMatrix | CSArray, *, _writer: Writer, fmt: Literal["csr", "csc"], @@ -756,9 +759,7 @@ def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]: @_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0")) -def read_sparse( - elem: GroupStorageType, *, _reader: Reader -) -> sparse.spmatrix | CSArray: +def read_sparse(elem: GroupStorageType, *, _reader: Reader) -> CSMatrix | CSArray: return sparse_dataset(elem).to_memory() diff --git a/src/anndata/abc.py b/src/anndata/abc.py index 1b3bb88a9..03f91950c 100644 --- a/src/anndata/abc.py +++ b/src/anndata/abc.py @@ -7,9 +7,8 @@ from typing import ClassVar, Literal import numpy as np - from scipy.sparse import csc_matrix, csr_matrix - from .compat import CSArray, Index + from .compat import CSArray, CSMatrix, Index __all__ = ["CSRDataset", "CSCDataset"] @@ -31,7 +30,7 @@ class _AbstractCSDataset(ABC): """Which file type is used on-disk.""" @abstractmethod - def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | CSArray: + def __getitem__(self, index: Index) -> float | CSMatrix | CSArray: """Load a slice or an element from the sparse dataset into memory. Parameters @@ -45,7 +44,7 @@ def __getitem__(self, index: Index) -> float | csr_matrix | csc_matrix | CSArray """ @abstractmethod - def to_memory(self) -> csr_matrix | csc_matrix | CSArray: + def to_memory(self) -> CSMatrix | CSArray: """Load the sparse dataset into memory. Returns diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index b257b0d7d..a49310753 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -30,6 +30,7 @@ # scipy sparse array comapt # ############################# +CSMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix CAN_USE_SPARSE_ARRAY = Version(scipy.__version__) >= Version("1.11") @@ -62,7 +63,7 @@ class Empty: | tuple[Index1D, Index1D, EllipsisType] | tuple[EllipsisType, Index1D, Index1D] | tuple[Index1D, EllipsisType, Index1D] - | scipy.sparse.spmatrix + | CSMatrix | CSArray ) H5Group = h5py.Group diff --git a/src/anndata/typing.py b/src/anndata/typing.py index f3df44fa2..c09b88abd 100644 --- a/src/anndata/typing.py +++ b/src/anndata/typing.py @@ -5,13 +5,13 @@ import numpy as np import pandas as pd from numpy import ma -from scipy import sparse from . import abc from ._core.anndata import AnnData from .compat import ( AwkArray, CSArray, + CSMatrix, CupyArray, CupySparseMatrix, DaskArray, @@ -35,8 +35,7 @@ ArrayDataStructureType: TypeAlias = ( np.ndarray | ma.MaskedArray - | sparse.csr_matrix - | sparse.csc_matrix + | CSMatrix | CSArray | AwkArray | H5Array diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 30c1d09ae..3744aab2b 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -10,7 +10,7 @@ from scipy import sparse import anndata as ad -from anndata.compat import CSArray +from anndata.compat import CSArray, CSMatrix from anndata.tests.helpers import ( GEN_ADATA_DASK_ARGS, as_dense_dask_array, @@ -200,8 +200,8 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): var_idx = subset_func2(mem_adata.var_names) if ( array_type is asarray - and isinstance(obs_idx, list | np.ndarray | sparse.spmatrix | CSArray) - and isinstance(var_idx, list | np.ndarray | sparse.spmatrix | CSArray) + and isinstance(obs_idx, list | np.ndarray | CSMatrix | CSArray) + and isinstance(var_idx, list | np.ndarray | CSMatrix | CSArray) ): pytest.xfail( "Fancy indexing does not work with multiple arrays on a h5py.Dataset" diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 4516d195c..84f403343 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -14,7 +14,7 @@ from anndata._core.anndata import AnnData from anndata._core.sparse_dataset import sparse_dataset from anndata._io.specs.registry import read_elem_as_dask -from anndata.compat import CAN_USE_SPARSE_ARRAY, CSArray, DaskArray +from anndata.compat import CAN_USE_SPARSE_ARRAY, CSArray, CSMatrix, DaskArray from anndata.experimental import read_dispatched from anndata.tests.helpers import AccessTrackingStore, assert_equal, subset_func @@ -263,8 +263,8 @@ def test_consecutive_bool( ) def test_dataset_append_memory( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], - append_method: Callable[[list[sparse.spmatrix]], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], CSMatrix], + append_method: Callable[[list[CSMatrix]], CSMatrix], diskfmt: Literal["h5ad", "zarr"], ): path = tmp_path / f"test.{diskfmt.replace('ad', '')}" @@ -319,7 +319,7 @@ def test_append_array_cache_bust(tmp_path: Path, diskfmt: Literal["h5ad", "zarr" ) def test_read_array( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], CSMatrix], diskfmt: Literal["h5ad", "zarr"], subset_func, subset_func2, @@ -339,7 +339,7 @@ def test_read_array( ad.settings.use_sparse_array_on_read = True assert issubclass(type(diskmtx[obs_idx, var_idx]), CSArray) ad.settings.use_sparse_array_on_read = False - assert issubclass(type(diskmtx[obs_idx, var_idx]), sparse.spmatrix) + assert issubclass(type(diskmtx[obs_idx, var_idx]), CSMatrix) @pytest.mark.parametrize( @@ -351,8 +351,8 @@ def test_read_array( ) def test_dataset_append_disk( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], - append_method: Callable[[list[sparse.spmatrix]], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], CSMatrix], + append_method: Callable[[list[CSMatrix]], CSMatrix], diskfmt: Literal["h5ad", "zarr"], ): path = tmp_path / f"test.{diskfmt.replace('ad', '')}" @@ -379,7 +379,7 @@ def test_dataset_append_disk( @pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix]) def test_lazy_array_cache( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], CSMatrix], ): elems = {"indptr", "indices", "data"} path = tmp_path / "test.zarr" @@ -481,7 +481,7 @@ def width_idx_kinds( ) def test_data_access( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], CSMatrix], idx_maj: Idx, idx_min: Idx, exp: Sequence[str], diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index a4b3505ee..91012f80a 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -20,7 +20,14 @@ from anndata import AnnData, Raw, concat from anndata._core import merge from anndata._core.index import _subset -from anndata.compat import AwkArray, CSArray, CupySparseMatrix, DaskArray, SpArray +from anndata.compat import ( + AwkArray, + CSArray, + CSMatrix, + CupySparseMatrix, + DaskArray, + SpArray, +) from anndata.tests import helpers from anndata.tests.helpers import ( BASE_MATRIX_PARAMS, @@ -200,7 +207,7 @@ def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_com if isinstance(orig.X, CSArray): base_type = CSArray else: - base_type = sparse.spmatrix + base_type = CSMatrix if isinstance(orig.X, CupySparseMatrix): base_type = CupySparseMatrix assert isinstance(result.X, base_type) @@ -404,7 +411,7 @@ def test_concatenate_obsm_outer(obsm_adatas, fill_val): ), ) - assert isinstance(outer.obsm["sparse"], sparse.spmatrix) + assert isinstance(outer.obsm["sparse"], CSMatrix) np.testing.assert_equal( outer.obsm["sparse"].toarray(), np.array( @@ -1496,7 +1503,7 @@ def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): if sparse.issparse(result.X): # See https://github.com/scipy/scipy/issues/20389 for why this doesn't work with csc if sparse_indexer_type == np.int64 and ( - issubclass(cpu_array_type, sparse.spmatrix) or adata.X.format == "csc" + issubclass(cpu_array_type, CSMatrix) or adata.X.format == "csc" ): pytest.xfail( "Data type int64 is not maintained for sparse matrices or csc array" diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index 87e46cf61..b6cf8f21c 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -30,7 +30,7 @@ pd.DataFrame, ), varm_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame), - layers_types=(sparse.spmatrix, np.ndarray, pd.DataFrame), + layers_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame), ) diff --git a/tests/test_io_conversion.py b/tests/test_io_conversion.py index 217a9cc16..763c89233 100644 --- a/tests/test_io_conversion.py +++ b/tests/test_io_conversion.py @@ -10,6 +10,7 @@ from scipy import sparse import anndata as ad +from anndata.compat import CSMatrix from anndata.tests.helpers import assert_equal, gen_adata @@ -99,8 +100,8 @@ def test_dense_to_sparse_memory(tmp_path, spmtx_format, to_convert): orig = gen_adata((50, 50), np.array) orig.raw = orig.copy() orig.write_h5ad(dense_path) - assert not isinstance(orig.X, sparse.spmatrix) - assert not isinstance(orig.raw.X, sparse.spmatrix) + assert not isinstance(orig.X, CSMatrix) + assert not isinstance(orig.raw.X, CSMatrix) curr = ad.read_h5ad(dense_path, as_sparse=to_convert, as_sparse_fmt=spmtx_format) diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py index b45b63e3c..3246d64d2 100644 --- a/tests/test_io_dispatched.py +++ b/tests/test_io_dispatched.py @@ -4,10 +4,9 @@ import h5py import zarr -from scipy import sparse import anndata as ad -from anndata.compat import CSArray +from anndata.compat import CSArray, CSMatrix from anndata.experimental import read_dispatched, write_dispatched from anndata.tests.helpers import assert_equal, gen_adata @@ -96,7 +95,7 @@ def set_copy(d, **kwargs): # TODO: Should the passed path be absolute? path = "/" + store.path + "/" + k if hasattr(elem, "shape") and not isinstance( - elem, sparse.spmatrix | CSArray | ad.AnnData + elem, CSMatrix | CSArray | ad.AnnData ): if re.match(r"^/((X)|(layers)).*", path): chunks = (M, N) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index be9ce62ed..31188b173 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -22,7 +22,13 @@ get_spec, ) from anndata._io.specs.registry import IORegistryError -from anndata.compat import CAN_USE_SPARSE_ARRAY, CSArray, ZarrGroup, _read_attr +from anndata.compat import ( + CAN_USE_SPARSE_ARRAY, + CSArray, + CSMatrix, + ZarrGroup, + _read_attr, +) from anndata.experimental import read_elem_as_dask from anndata.io import read_elem, write_elem from anndata.tests.helpers import ( @@ -244,7 +250,7 @@ def test_io_spec_compressed_scalars(store: G, value: np.ndarray, encoding_type: @pytest.mark.parametrize("as_dask", [False, True]) def test_io_spec_cupy(store, value, encoding_type, as_dask): if as_dask: - if isinstance(value, sparse.spmatrix): + if isinstance(value, CSMatrix): value = as_cupy_sparse_dask_array(value, format=encoding_type[:3]) else: value = as_dense_cupy_dask_array(value) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 4c4018a09..0e23c4a25 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -15,12 +15,11 @@ import pytest import zarr from numba.core.errors import NumbaDeprecationWarning -from scipy import sparse from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata.compat import CSArray, DaskArray, _read_attr +from anndata.compat import CSArray, CSMatrix, DaskArray, _read_attr from anndata.tests.helpers import as_dense_dask_array, assert_equal, gen_adata if TYPE_CHECKING: @@ -160,7 +159,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa # since we tested if assigned types and loaded types are DaskArray # this would also work if they work if isinstance(adata_src.raw.X, CSArray): - assert isinstance(adata.raw.X, sparse.spmatrix) + assert isinstance(adata.raw.X, CSMatrix) else: assert isinstance(adata_src.raw.X, type(adata.raw.X) | DaskArray) assert isinstance( From f12f044e8cebf841aea9d9e66715b6241441b86b Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 11:14:18 +0100 Subject: [PATCH 4/8] style --- src/anndata/_io/specs/methods.py | 10 +++------- tests/test_concatenate.py | 5 +---- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 5bd605c17..327e5fbbc 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -51,13 +51,9 @@ from numpy import typing as npt from numpy.typing import NDArray - from anndata._types import ArrayStorageType, GroupStorageType - from anndata.compat import ( - CSArray, - CSMatrix, - ) - from anndata.typing import AxisStorable, InMemoryArrayOrScalarType - + from ..._types import ArrayStorageType, GroupStorageType + from ...compat import CSArray, CSMatrix + from ...typing import AxisStorable, InMemoryArrayOrScalarType from .registry import Reader, Writer #################### diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index 91012f80a..622397d88 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -204,10 +204,7 @@ def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_com assert_equal(result[orig.obs_names].copy(), orig) base_type = type(orig.X) if sparse.issparse(orig.X): - if isinstance(orig.X, CSArray): - base_type = CSArray - else: - base_type = CSMatrix + base_type = CSArray if isinstance(orig.X, CSArray) else CSMatrix if isinstance(orig.X, CupySparseMatrix): base_type = CupySparseMatrix assert isinstance(result.X, base_type) From 28b032f5c63f7138f4f7418c870a9a0d312e1225 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 11:45:06 +0100 Subject: [PATCH 5/8] Undo isinstance checks --- src/anndata/_core/anndata.py | 6 +++--- src/anndata/_core/index.py | 8 ++++---- src/anndata/_core/merge.py | 26 +++++++++++++------------- src/anndata/_core/sparse_dataset.py | 16 ++++++++-------- src/anndata/_io/h5ad.py | 6 +++--- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 3255b6a0e..e5f5c3ba8 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -26,7 +26,7 @@ from .. import utils from .._settings import settings -from ..compat import CSArray, DaskArray, ZarrArray, _move_adj_mtx +from ..compat import DaskArray, SpArray, ZarrArray, _move_adj_mtx from ..logging import anndata_logger as logger from ..utils import ( axis_len, @@ -615,7 +615,7 @@ def X(self, value: ArrayDataStructureType | None): if sparse.issparse(self._adata_ref._X) and isinstance( value, np.ndarray ): - if isinstance(self._adata_ref.X, CSArray): + if isinstance(self._adata_ref.X, SpArray): memory_class = sparse.coo_array else: memory_class = sparse.coo_matrix @@ -1705,7 +1705,7 @@ def concatenate( # Backwards compat (some of this could be more efficient) # obs used to always be an outer join sparse_class = sparse.csr_matrix - if any(isinstance(a.X, CSArray) for a in all_adatas): + if any(isinstance(a.X, SpArray) for a in all_adatas): sparse_class = sparse.csr_array out.obs = concat( [AnnData(sparse_class(a.shape), obs=a.obs) for a in all_adatas], diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 9b37ced52..90b1108ac 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -10,10 +10,10 @@ import pandas as pd from scipy.sparse import issparse, spmatrix -from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, SpArray +from ..compat import AwkArray, DaskArray, SpArray if TYPE_CHECKING: - from ..compat import Index, Index1D + from ..compat import CSArray, CSMatrix, Index, Index1D def _normalize_indices( @@ -69,13 +69,13 @@ def name_idx(i): elif isinstance(indexer, str): return index.get_loc(indexer) # int elif isinstance( - indexer, Sequence | np.ndarray | pd.Index | CSMatrix | np.matrix | CSArray + indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray ): if hasattr(indexer, "shape") and ( (indexer.shape == (index.shape[0], 1)) or (indexer.shape == (1, index.shape[0])) ): - if isinstance(indexer, CSMatrix | CSArray): + if isinstance(indexer, spmatrix | SpArray): indexer = indexer.toarray() indexer = np.ravel(indexer) if not isinstance(indexer, np.ndarray | pd.Index): diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 2c61bef4e..511854a4c 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -24,8 +24,6 @@ from ..compat import ( CAN_USE_SPARSE_ARRAY, AwkArray, - CSArray, - CSMatrix, CupyArray, CupyCSRMatrix, CupySparseMatrix, @@ -43,6 +41,8 @@ from pandas.api.extensions import ExtensionDtype + from ..compat import CSArray, CSMatrix + T = TypeVar("T") ################### @@ -176,7 +176,7 @@ def equal_sparse(a, b) -> bool: xp = array_api_compat.array_namespace(a.data) - if isinstance(b, CupySparseMatrix | CSMatrix | CSArray): + if isinstance(b, CupySparseMatrix | spmatrix | SpArray): if isinstance(a, CupySparseMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 @@ -208,7 +208,7 @@ def equal_awkward(a, b) -> bool: def as_sparse(x, use_sparse_array: bool = False) -> CSMatrix | CSArray: - if not isinstance(x, CSMatrix | CSArray): + if not isinstance(x, spmatrix | SpArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) return sparse.csr_matrix(x) @@ -539,7 +539,7 @@ def apply(self, el, *, axis, fill_value=None): return el if isinstance(el, pd.DataFrame): return self._apply_to_df(el, axis=axis, fill_value=fill_value) - elif isinstance(el, CSMatrix | CSArray | CupySparseMatrix): + elif isinstance(el, spmatrix | SpArray | CupySparseMatrix): return self._apply_to_sparse(el, axis=axis, fill_value=fill_value) elif isinstance(el, AwkArray): return self._apply_to_awkward(el, axis=axis, fill_value=fill_value) @@ -640,7 +640,7 @@ def _apply_to_sparse( shape[axis] = len(self.new_idx) shape = tuple(shape) if fill_value == 0: - if isinstance(el, CSArray): + if isinstance(el, SpArray): memory_class = sparse.csr_array else: memory_class = sparse.csr_matrix @@ -654,7 +654,7 @@ def _apply_to_sparse( idxmtx_dtype = xp.promote_types(el.dtype, xp.array(fill_value).dtype) else: idxmtx_dtype = bool - if isinstance(el, CSArray): + if isinstance(el, SpArray): memory_class = sparse.coo_array else: memory_class = sparse.coo_matrix @@ -732,8 +732,8 @@ def default_fill_value(els): This is largely due to backwards compat, and might not be the ideal solution. """ if any( - isinstance(el, CSMatrix | CSArray) - or (isinstance(el, DaskArray) and isinstance(el._meta, CSMatrix | CSArray)) + isinstance(el, spmatrix | SpArray) + or (isinstance(el, DaskArray) and isinstance(el._meta, spmatrix | SpArray)) for el in els ): return 0 @@ -829,9 +829,9 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ], axis=axis, ) - elif any(isinstance(a, CSMatrix | CSArray) for a in arrays): + elif any(isinstance(a, spmatrix | SpArray) for a in arrays): sparse_stack = (sparse.vstack, sparse.hstack)[axis] - use_sparse_array = any(issubclass(type(a), CSArray) for a in arrays) + use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays) return sparse_stack( [ f( @@ -940,7 +940,7 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): def missing_element( n: int, - els: list[CSArray | sparse.csr_matrix | sparse.csc_matrix | np.ndarray | DaskArray], + els: list[CSArray | CSMatrix | np.ndarray | DaskArray], axis: Literal[0, 1] = 0, fill_value: Any | None = None, off_axis_size: int = 0, @@ -1005,7 +1005,7 @@ def concat_pairwise_mapping( mappings: Collection[Mapping], shapes: Collection[int], join_keys=intersect_keys ): result = {} - if any(any(isinstance(v, CSArray) for v in m.values()) for m in mappings): + if any(any(isinstance(v, SpArray) for v in m.values()) for m in mappings): sparse_class = sparse.csr_array else: sparse_class = sparse.csr_matrix diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index 6b1ec7a9e..6d41157a0 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -30,7 +30,7 @@ from .. import abc from .._settings import settings -from ..compat import CSArray, CSMatrix, H5Group, ZarrArray, ZarrGroup, _read_attr +from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr from .index import _fix_slice_bounds, _subset, unpack_index if TYPE_CHECKING: @@ -40,7 +40,7 @@ from scipy.sparse._compressed import _cs_matrix from .._types import GroupStorageType - from ..compat import H5Array + from ..compat import CSArray, CSMatrix, H5Array from .index import Index, Index1D else: from scipy.sparse import spmatrix as _cs_matrix @@ -327,9 +327,9 @@ def get_memory_class( ) -> type[_cs_matrix]: for fmt, _, memory_class in FORMATS: if format == fmt: - if use_sparray_in_io and issubclass(memory_class, CSArray): + if use_sparray_in_io and issubclass(memory_class, SpArray): return memory_class - elif not use_sparray_in_io and issubclass(memory_class, CSMatrix): + elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): return memory_class msg = f"Format string {format} is not supported." raise ValueError(msg) @@ -340,9 +340,9 @@ def get_backed_class( ) -> type[BackedSparseMatrix]: for fmt, backed_class, _ in FORMATS: if format == fmt: - if use_sparray_in_io and issubclass(backed_class, CSArray): + if use_sparray_in_io and issubclass(backed_class, SpArray): return backed_class - elif not use_sparray_in_io and issubclass(backed_class, CSMatrix): + elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): return backed_class msg = f"Format string {format} is not supported." raise ValueError(msg) @@ -464,8 +464,8 @@ def __getitem__(self, index: Index | tuple[()]) -> float | CSMatrix | CSArray: mtx_fmt = get_memory_class( self.format, use_sparray_in_io=settings.use_sparse_array_on_read ) - must_convert_to_array = issubclass(mtx_fmt, CSArray) and not isinstance( - sub, CSArray + must_convert_to_array = issubclass(mtx_fmt, SpArray) and not isinstance( + sub, SpArray ) if isinstance(sub, BackedSparseMatrix) or must_convert_to_array: return mtx_fmt(sub) diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index 1417d3814..edee59775 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -18,7 +18,6 @@ from .._core.file_backing import filename from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import ( - CSMatrix, _clean_uns, _decode_structured_array, _from_fixed_length_strings, @@ -39,6 +38,7 @@ from typing import Any, Literal from .._core.file_backing import AnnDataFileManager + from ..compat import CSMatrix T = TypeVar("T") @@ -83,14 +83,14 @@ def write_h5ad( f.attrs.setdefault("encoding-version", "0.1.0") if "X" in as_dense and isinstance( - adata.X, CSMatrix | BaseCompressedSparseDataset + adata.X, sparse.spmatrix | BaseCompressedSparseDataset ): write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) elif not (adata.isbacked and Path(adata.filename) == Path(filepath)): # If adata.isbacked, X should already be up to date write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) if "raw/X" in as_dense and isinstance( - adata.raw.X, CSMatrix | BaseCompressedSparseDataset + adata.raw.X, sparse.spmatrix | BaseCompressedSparseDataset ): write_sparse_as_dense( f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs From fab3f99db223d204cc9eb53d6f5bd7f278cb5fbd Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 11:49:53 +0100 Subject: [PATCH 6/8] pretty make_dask_chunk --- src/anndata/_io/specs/lazy_methods.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index b04aa6769..98cd2dcc8 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -20,7 +20,7 @@ from collections.abc import Generator, Mapping, Sequence from typing import Literal, ParamSpec, TypeVar - from ...compat import CSArray, DaskArray, H5File + from ...compat import CSArray, CSMatrix, DaskArray, H5File from .registry import DaskReader BlockInfo = Mapping[ @@ -72,7 +72,7 @@ def make_dask_chunk( path_or_sparse_dataset: Path | D, elem_name: str, block_info: BlockInfo | None = None, -) -> sparse.csr_matrix | sparse.csc_matrix | CSArray: +) -> CSMatrix | CSArray: if block_info is None: msg = "Block info is required" raise ValueError(msg) From 77b7a489a59b829de5945c811cf8ed0221772043 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Mar 2025 11:52:01 +0100 Subject: [PATCH 7/8] kw --- src/anndata/_core/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 511854a4c..c3daa9bdb 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -207,7 +207,7 @@ def equal_awkward(a, b) -> bool: return ak.almost_equal(a, b) -def as_sparse(x, use_sparse_array: bool = False) -> CSMatrix | CSArray: +def as_sparse(x, *, use_sparse_array: bool = False) -> CSMatrix | CSArray: if not isinstance(x, spmatrix | SpArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) From 5863c6dc6d757f4794ec9338a583f05a959aa41e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 6 Mar 2025 16:00:57 +0100 Subject: [PATCH 8/8] fix test --- tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_base.py b/tests/test_base.py index a069cc138..8bb9d275c 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -38,7 +38,7 @@ def test_creation(): AnnData(ma.array([[1, 2], [3, 4]]), uns=dict(mask=[0, 1, 1, 0])) AnnData(sp.eye(2, format="csr")) if CAN_USE_SPARSE_ARRAY: - AnnData(sp.eye_array(2)) + AnnData(sp.eye_array(2, format="csr")) X = np.array([[1, 2, 3], [4, 5, 6]]) adata = AnnData( X=X,