diff --git a/.github/workflows/test-cpu.yml b/.github/workflows/test-cpu.yml index 0233f5876..c51935a4d 100644 --- a/.github/workflows/test-cpu.yml +++ b/.github/workflows/test-cpu.yml @@ -8,7 +8,7 @@ on: pull_request: env: - PYTEST_ADDOPTS: "-v --color=yes" + PYTEST_ADDOPTS: "-v --color=yes -n auto --instafail" FORCE_COLOR: "1" defaults: @@ -19,7 +19,7 @@ defaults: # https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true + cancel-in-progress: false # TODO: set to `true` again jobs: pytest: @@ -34,9 +34,9 @@ jobs: - python-version: '3.13' dependencies-version: pre-release test-type: strict-warning - - python-version: '3.11' - dependencies-version: minimum - test-type: coverage + #- python-version: '3.11' + # dependencies-version: minimum + # test-type: coverage steps: - uses: actions/checkout@v4 with: @@ -74,15 +74,15 @@ jobs: - name: Run Pytest if: matrix.test-type == 'standard' - run: pytest -n auto + run: pytest - name: Run Pytest (coverage) if: matrix.test-type == 'coverage' - run: coverage run -m pytest -n auto --cov --cov-report=xml + run: coverage run -m pytest --cov --cov-report=xml - name: Run Pytest (treat warnings as errors) if: matrix.test-type == 'strict-warning' - run: pytest --strict-warnings -n auto + run: pytest --strict-warnings - uses: codecov/codecov-action@v4 if: matrix.test-type == 'coverage' diff --git a/ci/constraints.txt b/ci/constraints.txt index ea0efd596..c352fe7b4 100644 --- a/ci/constraints.txt +++ b/ci/constraints.txt @@ -1 +1,2 @@ numba>=0.56 +fast-array-utils @ git+https://github.com/scverse/fast-array-utils.git diff --git a/hatch.toml b/hatch.toml index 12d567fc3..df1f41069 100644 --- a/hatch.toml +++ b/hatch.toml @@ -16,7 +16,7 @@ scripts.clean = "git restore --source=HEAD --staged --worktree -- docs/release-n [envs.hatch-test] default-args = [ ] features = [ "dev", "test" ] -extra-dependencies = [ "ipykernel" ] +extra-dependencies = [ "ipykernel", "fast-array-utils @ git+https://github.com/scverse/fast-array-utils.git" ] env-vars.UV_CONSTRAINT = "ci/constraints.txt" overrides.matrix.deps.env-vars = [ { if = [ "pre" ], key = "UV_PRERELEASE", value = "allow" }, diff --git a/pyproject.toml b/pyproject.toml index db0dbfdc4..20ca0209b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "packaging>=24.2", # array-api-compat 1.5 has https://github.com/scverse/anndata/issues/1410 "array_api_compat>1.4,!=1.5", + "fast-array-utils", "legacy-api-wrap", "zarr >=2.15.0, !=3.0.0, !=3.0.1, !=3.0.2, !=3.0.3", ] @@ -85,25 +86,27 @@ doc = [ dev-doc = [ "towncrier>=24.8.0" ] # release notes tool test-full = [ "anndata[test,lazy]" ] test = [ - "loompy>=3.0.5", "pytest>=8.2,<8.3.4", "pytest-cov", "pytest-randomly", "pytest-memray", "pytest-mock", + "pytest-instafail", + "fast-array-utils[testing]", "pytest-xdist[psutil]", "filelock", "matplotlib", "scikit-learn", + "loompy>=3.0.5", "openpyxl", "joblib", "boltons", "scanpy>=1.10", - "httpx", # For data downloading + "httpx", # For data downloading "dask[distributed]", "awkward>=2.3", "pyarrow", - "anndata[dask]", + "anndata[dask,lazy]", ] gpu = [ "cupy" ] cu12 = [ "cupy-cuda12x" ] diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 3bf95bd6e..8495e734d 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -32,7 +32,7 @@ DaskArray, _map_cat_to_str, ) -from ..utils import asarray, axis_len, warn_once +from ..utils import axis_len, warn_once from .anndata import AnnData from .index import _subset, make_slice @@ -115,8 +115,8 @@ def not_missing(v) -> bool: # TODO: Hopefully this will stop being an issue in the future and this code can be removed. @singledispatch def equal(a, b) -> bool: - a = asarray(a) - b = asarray(b) + a = np.asarray(a) + b = np.asarray(b) if a.ndim == b.ndim == 0: return bool(a == b) return np.array_equal(a, b) @@ -150,7 +150,7 @@ def equal_dask_array(a, b) -> bool: def equal_array(a, b) -> bool: # Reshaping allows us to compare inputs with >2 dimensions # We cast to pandas since it will still work with non-numeric types - b = asarray(b) + b = np.asarray(b) if a.shape != b.shape: return False diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 0c1c5132f..ee71acdb7 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -12,7 +12,7 @@ import h5py import numpy as np import pandas as pd -import scipy +import scipy.sparse from packaging.version import Version from zarr import Array as ZarrArray # noqa: F401 from zarr import Group as ZarrGroup diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index b1e265324..839a58fc0 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -2,12 +2,10 @@ import itertools import random -import re import warnings from collections import Counter, defaultdict from collections.abc import Mapping -from contextlib import contextmanager -from functools import partial, singledispatch, wraps +from functools import singledispatch, wraps from string import ascii_letters from typing import TYPE_CHECKING @@ -15,6 +13,7 @@ import numpy as np import pandas as pd import pytest +from fast_array_utils.conv import to_dense from pandas.api.types import is_numeric_dtype from scipy import sparse @@ -28,14 +27,11 @@ CSArray, CSMatrix, CupyArray, - CupyCSCMatrix, - CupyCSRMatrix, CupySparseMatrix, DaskArray, ZarrArray, is_zarr_v2, ) -from anndata.utils import asarray if TYPE_CHECKING: from collections.abc import Callable, Collection, Iterable @@ -584,7 +580,7 @@ def assert_equal_cupy( def assert_equal_ndarray( a: np.ndarray, b: object, *, exact: bool = False, elem_name: str | None = None ): - b = asarray(b) + b = to_dense(b) if not exact and is_numeric_dtype(a) and is_numeric_dtype(b): assert a.shape == b.shape, format_msg(elem_name) np.testing.assert_allclose(a, b, equal_nan=True, err_msg=format_msg(elem_name)) @@ -611,7 +607,7 @@ def assert_equal_ndarray( def assert_equal_arrayview( a: ArrayView, b: object, *, exact: bool = False, elem_name: str | None = None ): - assert_equal(asarray(a), asarray(b), exact=exact, elem_name=elem_name) + assert_equal(to_dense(a), to_dense(b), exact=exact, elem_name=elem_name) @assert_equal.register(BaseCompressedSparseDataset) @@ -623,7 +619,7 @@ def assert_equal_sparse( exact: bool = False, elem_name: str | None = None, ): - a = asarray(a) + a = to_dense(a, to_memory=True) assert_equal(b, a, exact=exact, elem_name=elem_name) @@ -647,7 +643,7 @@ def assert_equal_cupy_sparse( def assert_equal_h5py_dataset( a: ArrayStorageType, b: object, *, exact: bool = False, elem_name: str | None = None ): - a = asarray(a) + a = to_dense(a) assert_equal(b, a, exact=exact, elem_name=elem_name) @@ -825,222 +821,6 @@ def fmt_name(x): ) -def _half_chunk_size(a: tuple[int, ...]) -> tuple[int, ...]: - def half_rounded_up(x): - div, mod = divmod(x, 2) - return div + (mod > 0) - - return tuple(half_rounded_up(x) for x in a) - - -@singledispatch -def as_dense_dask_array(a): - import dask.array as da - - a = asarray(a) - return da.asarray(a, chunks=_half_chunk_size(a.shape)) - - -@as_dense_dask_array.register(CSMatrix) -def _(a): - return as_dense_dask_array(a.toarray()) - - -@as_dense_dask_array.register(DaskArray) -def _(a): - return a.map_blocks(asarray, dtype=a.dtype, meta=np.ndarray) - - -@singledispatch -def as_sparse_dask_array(a) -> DaskArray: - import dask.array as da - - return da.from_array(sparse.csr_matrix(a), chunks=_half_chunk_size(a.shape)) - - -@as_sparse_dask_array.register(CSMatrix) -def _(a): - import dask.array as da - - return da.from_array(a, _half_chunk_size(a.shape)) - - -@as_sparse_dask_array.register(CSArray) -def _(a): - import dask.array as da - - return da.from_array(sparse.csr_matrix(a), _half_chunk_size(a.shape)) - - -@as_sparse_dask_array.register(DaskArray) -def _(a): - return a.map_blocks(sparse.csr_matrix) - - -@singledispatch -def as_dense_cupy_dask_array(a): - import cupy as cp - - return as_dense_dask_array(a).map_blocks( - cp.array, meta=cp.array((1.0), dtype=a.dtype), dtype=a.dtype - ) - - -@as_dense_cupy_dask_array.register(CupyArray) -def _(a): - import cupy as cp - import dask.array as da - - return da.from_array( - a, - chunks=_half_chunk_size(a.shape), - meta=cp.array((1.0), dtype=a.dtype), - ) - - -@as_dense_cupy_dask_array.register(DaskArray) -def _(a): - import cupy as cp - - if isinstance(a._meta, cp.ndarray): - return a.copy() - return a.map_blocks( - partial(as_cupy, typ=CupyArray), - dtype=a.dtype, - meta=cp.array((1.0), dtype=a.dtype), - ) - - -try: - import cupyx.scipy.sparse as cpsparse - - format_to_memory_class = {"csr": cpsparse.csr_matrix, "csc": cpsparse.csc_matrix} -except ImportError: - format_to_memory_class = {} - - -# TODO: If there are chunks which divide along columns, then a coo_matrix is returned by compute -# We should try and fix this upstream in dask/ cupy -@singledispatch -def as_cupy_sparse_dask_array(a, format="csr"): - memory_class = format_to_memory_class[format] - cpu_da = as_sparse_dask_array(a) - return cpu_da.rechunk((cpu_da.chunks[0], -1)).map_blocks( - memory_class, dtype=a.dtype, meta=memory_class(cpu_da._meta) - ) - - -@as_cupy_sparse_dask_array.register(CupyArray) -@as_cupy_sparse_dask_array.register(CupySparseMatrix) -def _(a, format="csr"): - import dask.array as da - - memory_class = format_to_memory_class[format] - return da.from_array(memory_class(a), chunks=(_half_chunk_size(a.shape)[0], -1)) - - -@as_cupy_sparse_dask_array.register(DaskArray) -def _(a, format="csr"): - memory_class = format_to_memory_class[format] - if isinstance(a._meta, memory_class): - return a.copy() - return a.rechunk((a.chunks[0], -1)).map_blocks( - partial(as_cupy, typ=memory_class), dtype=a.dtype - ) - - -@contextmanager -def pytest_8_raises(exc_cls, *, match: str | re.Pattern = None): - """Error handling using pytest 8's support for __notes__. - - See: https://github.com/pytest-dev/pytest/pull/11227 - - Remove once pytest 8 is out! - """ - - with pytest.raises(exc_cls) as exc_info: - yield exc_info - - check_error_or_notes_match(exc_info, match) - - -def check_error_or_notes_match(e: pytest.ExceptionInfo, pattern: str | re.Pattern): - """ - Checks whether the printed error message or the notes contains the given pattern. - - DOES NOT WORK IN IPYTHON - because of the way IPython handles exceptions - """ - import traceback - - message = "".join(traceback.format_exception_only(e.type, e.value)) - assert re.search(pattern, message), ( - f"Could not find pattern: '{pattern}' in error:\n\n{message}\n" - ) - - -def resolve_cupy_type(val): - if not isinstance(val, type): - input_typ = type(val) - else: - input_typ = val - - if issubclass(input_typ, np.ndarray): - typ = CupyArray - elif issubclass(input_typ, sparse.csr_matrix): - typ = CupyCSRMatrix - elif issubclass(input_typ, sparse.csc_matrix): - typ = CupyCSCMatrix - else: - msg = f"No default target type for input type {input_typ}" - raise NotImplementedError(msg) - return typ - - -@singledispatch -def as_cupy(val, typ=None): - """ - Rough conversion function - - Will try to infer target type from input type if not specified. - """ - if typ is None: - typ = resolve_cupy_type(val) - - if issubclass(typ, CupyArray): - import cupy as cp - - if isinstance(val, CSMatrix): - val = val.toarray() - return cp.array(val) - elif issubclass(typ, CupyCSRMatrix): - import cupy as cp - import cupyx.scipy.sparse as cpsparse - - if isinstance(val, np.ndarray): - return cpsparse.csr_matrix(cp.array(val)) - else: - return cpsparse.csr_matrix(val) - elif issubclass(typ, CupyCSCMatrix): - import cupy as cp - import cupyx.scipy.sparse as cpsparse - - if isinstance(val, np.ndarray): - return cpsparse.csc_matrix(cp.array(val)) - else: - return cpsparse.csc_matrix(val) - else: - msg = f"Conversion from {type(val)} to {typ} not implemented" - raise NotImplementedError(msg) - - -# TODO: test -@as_cupy.register(DaskArray) -def as_cupy_dask(a, typ=None): - if typ is None: - typ = resolve_cupy_type(a._meta) - return a.map_blocks(partial(as_cupy, typ=typ), dtype=a.dtype) - - @singledispatch def shares_memory(x, y) -> bool: return np.shares_memory(x, y) @@ -1055,46 +835,6 @@ def shares_memory_sparse(x, y): ) -BASE_MATRIX_PARAMS = [ - pytest.param(asarray, id="np_array"), - pytest.param(sparse.csr_matrix, id="scipy_csr_matrix"), - pytest.param(sparse.csc_matrix, id="scipy_csc_matrix"), - pytest.param(sparse.csr_array, id="scipy_csr_array"), - pytest.param(sparse.csc_array, id="scipy_csc_array"), -] - -DASK_MATRIX_PARAMS = [ - pytest.param(as_dense_dask_array, id="dense_dask_array"), - pytest.param(as_sparse_dask_array, id="sparse_dask_array"), -] - -CUPY_MATRIX_PARAMS = [ - pytest.param( - partial(as_cupy, typ=CupyArray), id="cupy_array", marks=pytest.mark.gpu - ), - pytest.param( - partial(as_cupy, typ=CupyCSRMatrix), - id="cupy_csr", - marks=pytest.mark.gpu, - ), - pytest.param( - partial(as_cupy, typ=CupyCSCMatrix), - id="cupy_csc", - marks=pytest.mark.gpu, - ), -] - -DASK_CUPY_MATRIX_PARAMS = [ - pytest.param( - as_dense_cupy_dask_array, - id="cupy_dense_dask_array", - marks=pytest.mark.gpu, - ), - pytest.param( - as_cupy_sparse_dask_array, id="cupy_csr_dask_array", marks=pytest.mark.gpu - ), -] - if is_zarr_v2(): from zarr.storage import DirectoryStore as LocalStore else: diff --git a/src/anndata/utils.py b/src/anndata/utils.py index f595f8ca7..f97d9a2fa 100644 --- a/src/anndata/utils.py +++ b/src/anndata/utils.py @@ -5,21 +5,20 @@ from functools import singledispatch, wraps from typing import TYPE_CHECKING -import h5py import numpy as np import pandas as pd -from scipy import sparse import anndata -from ._core.sparse_dataset import BaseCompressedSparseDataset -from .compat import CSArray, CupyArray, CupySparseMatrix, DaskArray from .logging import get_logger if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence from typing import Any, Literal + from scipy import sparse + + logger = get_logger(__name__) @@ -42,43 +41,6 @@ def import_name(name: str) -> Any: return obj -@singledispatch -def asarray(x): - """Convert x to a numpy array""" - return np.asarray(x) - - -@asarray.register(CSArray) -@asarray.register(sparse.spmatrix) -def asarray_sparse(x): - return x.toarray() - - -@asarray.register(BaseCompressedSparseDataset) -def asarray_sparse_dataset(x): - return asarray(x.to_memory()) - - -@asarray.register(h5py.Dataset) -def asarray_h5py_dataset(x): - return x[...] - - -@asarray.register(CupyArray) -def asarray_cupy(x): - return x.get() - - -@asarray.register(CupySparseMatrix) -def asarray_cupy_sparse(x): - return x.toarray().get() - - -@asarray.register(DaskArray) -def asarray_dask(x): - return asarray(x.compute()) - - @singledispatch def convert_to_dict(obj) -> dict: return dict(obj) @@ -208,10 +170,6 @@ def axis_len_awkward(array, axis: Literal[0, 1]) -> int | None: # Use `None` as null token. return None if context["out"] == -1 else context["out"] - @asarray.register(ak.Array) - def asarray_awkward(x): - return x - except ImportError: pass diff --git a/src/testing/anndata/_pytest.py b/src/testing/anndata/_pytest.py index 5dc038150..babf26a23 100644 --- a/src/testing/anndata/_pytest.py +++ b/src/testing/anndata/_pytest.py @@ -16,6 +16,8 @@ import pytest +from testing.fast_array_utils import ArrayType, Flags + if TYPE_CHECKING: from collections.abc import Generator, Iterable from pathlib import Path @@ -66,6 +68,13 @@ def _doctest_env( def pytest_itemcollected(item: pytest.Item) -> None: """Define behavior of pytest.mark.gpu.""" + if ( + isinstance(item, pytest.Function) + and hasattr(item, "callspec") + and isinstance(at := item.callspec.params.get("array_type"), ArrayType) + and at.flags & Flags.Gpu + ): + item.add_marker(pytest.mark.gpu) is_gpu = len([mark for mark in item.iter_markers(name="gpu")]) > 0 if is_gpu: item.add_marker( diff --git a/tests/lazy/conftest.py b/tests/lazy/conftest.py index 4a153c25f..20484b0ff 100644 --- a/tests/lazy/conftest.py +++ b/tests/lazy/conftest.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd import pytest -from scipy import sparse import anndata as ad from anndata import AnnData @@ -15,7 +14,6 @@ from anndata.tests.helpers import ( DEFAULT_COL_TYPES, AccessTrackingStore, - as_dense_dask_array, gen_adata, gen_typed_df, ) @@ -25,16 +23,10 @@ from pathlib import Path from typing import Literal -ANNDATA_ELEMS = typing.get_args(AnnDataElem) + from testing.fast_array_utils import ArrayType -@pytest.fixture( - params=[sparse.csr_matrix, sparse.csc_matrix, np.array, as_dense_dask_array], - ids=["scipy-csr", "scipy-csc", "np-array", "dask_array"], - scope="session", -) -def mtx_format(request): - return request.param +ANNDATA_ELEMS = typing.get_args(AnnDataElem) @pytest.fixture( @@ -74,11 +66,11 @@ def simple_subset_func(request): return request.param -@pytest.fixture(scope="session") +@pytest.fixture def adata_remote_orig_with_path( - tmp_path_factory, + tmp_path_factory: pytest.TempPathFactory, diskfmt: str, - mtx_format, + array_type: ArrayType, worker_id: str = "serial", ) -> tuple[Path, AnnData]: """Create remote fixtures, one without a range index and the other with""" @@ -89,7 +81,7 @@ def adata_remote_orig_with_path( orig_path = tmp_path_factory.mktemp(file_name) orig = gen_adata( (100, 110), - mtx_format, + array_type, obs_dtypes=(*DEFAULT_COL_TYPES, pd.StringDtype), var_dtypes=(*DEFAULT_COL_TYPES, pd.StringDtype), ) @@ -115,10 +107,10 @@ def adata_orig(adata_remote_orig_with_path: tuple[Path, AnnData]) -> AnnData: return orig -@pytest.fixture(scope="session") +@pytest.fixture def adata_remote_with_store_tall_skinny_path( - tmp_path_factory, - mtx_format, + tmp_path_factory: pytest.TempPathFactory, + array_type: ArrayType, worker_id: str = "serial", ) -> Path: orig_path = tmp_path_factory.mktemp(f"orig_{worker_id}.zarr") @@ -131,7 +123,7 @@ def adata_remote_with_store_tall_skinny_path( orig = AnnData( obs=obs, var=var, - X=mtx_format(np.random.binomial(100, 0.005, (M, N)).astype(np.float32)), + X=array_type(np.random.binomial(100, 0.005, (M, N)).astype(np.float32)), ) orig.raw = orig.copy() orig.write_zarr(orig_path) diff --git a/tests/lazy/test_concat.py b/tests/lazy/test_concat.py index f04db4046..e0e1d666a 100644 --- a/tests/lazy/test_concat.py +++ b/tests/lazy/test_concat.py @@ -12,11 +12,10 @@ from anndata._core.file_backing import to_memory from anndata.experimental import read_lazy from anndata.tests.helpers import assert_equal, gen_adata +from testing.fast_array_utils import SUPPORTED_TYPES, Flags from .conftest import ANNDATA_ELEMS, get_key_trackers_for_columns_on_axis -pytestmark = pytest.mark.skipif(not find_spec("xarray"), reason="xarray not installed") - if TYPE_CHECKING: from collections.abc import Callable from pathlib import Path @@ -29,6 +28,13 @@ from anndata.tests.helpers import AccessTrackingStore +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} + +pytestmark = pytest.mark.skipif(not find_spec("xarray"), reason="xarray not installed") + + def unify_extension_dtypes( remote: pd.DataFrame, memory: pd.DataFrame ) -> tuple[pd.DataFrame, pd.DataFrame]: @@ -215,6 +221,7 @@ def test_concat_to_memory_var( @pytest.mark.xdist_group("dask") +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_concat_data_with_cluster_to_memory( adata_remote: AnnData, join: Join_T, local_cluster_addr: str ) -> None: @@ -224,6 +231,7 @@ def test_concat_data_with_cluster_to_memory( ad.concat([adata_remote, adata_remote], join=join).to_memory() +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) @pytest.mark.parametrize( "index", [ @@ -277,6 +285,7 @@ def test_concat_data_subsetting( ) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) @pytest.mark.parametrize( ("attr", "key"), ( diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index 3755e3023..f8424a4b9 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -8,6 +8,7 @@ from anndata.compat import DaskArray from anndata.experimental import read_lazy from anndata.tests.helpers import AccessTrackingStore, assert_equal, gen_adata +from testing.fast_array_utils import SUPPORTED_TYPES, Flags from .conftest import ANNDATA_ELEMS @@ -17,10 +18,17 @@ from anndata import AnnData from anndata._types import AnnDataElem + from testing.fast_array_utils import ArrayType + + +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} pytestmark = pytest.mark.skipif(not find_spec("xarray"), reason="xarray not installed") +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) @pytest.mark.parametrize( ("elem_key", "sub_key"), [ @@ -47,6 +55,7 @@ def test_access_count_elem_access( remote_store_tall_skinny.assert_access_count("X", 0) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_access_count_subset( remote_store_tall_skinny: AccessTrackingStore, adata_remote_tall_skinny: AnnData, @@ -62,6 +71,7 @@ def test_access_count_subset( remote_store_tall_skinny.assert_access_count(elem_name, 0) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_access_count_subset_column_compute( remote_store_tall_skinny: AccessTrackingStore, adata_remote_tall_skinny: AnnData, @@ -74,6 +84,7 @@ def test_access_count_subset_column_compute( remote_store_tall_skinny.assert_access_count("obs/int64", 1) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_access_count_index( remote_store_tall_skinny: AccessTrackingStore, ): @@ -85,6 +96,7 @@ def test_access_count_index( remote_store_tall_skinny.assert_access_count("obs/_index", 4) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_access_count_dtype( remote_store_tall_skinny: AccessTrackingStore, adata_remote_tall_skinny: AnnData, @@ -98,15 +110,18 @@ def test_access_count_dtype( remote_store_tall_skinny.assert_access_count("obs/cat/categories", 1) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_uns_uses_dask(adata_remote: AnnData): assert isinstance(adata_remote.uns["nested"]["nested_further"]["array"], DaskArray) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_to_memory(adata_remote: AnnData, adata_orig: AnnData): remote_to_memory = adata_remote.to_memory() assert_equal(remote_to_memory, adata_orig) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): obs_cats = adata_orig.obs["obs_cat"].cat.categories subset_obs = adata_orig.obs["obs_cat"] == obs_cats[0] @@ -117,6 +132,7 @@ def test_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): assert_equal(adata_orig[:, subset_var], adata_remote[:, subset_var].to_memory()) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) def test_view_of_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): cats_obs = adata_orig.obs["obs_cat"].cat.categories subset_obs = (adata_orig.obs["obs_cat"] == cats_obs[0]) | ( @@ -143,8 +159,9 @@ def test_view_of_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): ) -def test_unconsolidated(tmp_path: Path, mtx_format): - adata = gen_adata((1000, 1000), mtx_format) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_unconsolidated(tmp_path: Path, array_type: ArrayType) -> None: + adata = gen_adata((1000, 1000), array_type) orig_pth = tmp_path / "orig.zarr" adata.write_zarr(orig_pth) (orig_pth / ".zmetadata").unlink() diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index ecf2ef03e..93103fac0 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -3,22 +3,32 @@ from __future__ import annotations from pathlib import Path +from typing import TYPE_CHECKING import joblib import numpy as np import pytest +from fast_array_utils.conv import to_dense from scipy import sparse import anndata as ad from anndata.compat import CSArray, CSMatrix from anndata.tests.helpers import ( GEN_ADATA_DASK_ARGS, - as_dense_dask_array, assert_equal, gen_adata, subset_func, ) -from anndata.utils import asarray +from testing.fast_array_utils import SUPPORTED_TYPES, Flags + +if TYPE_CHECKING: + from testing.fast_array_utils import ArrayType + + +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} + subset_func2 = subset_func @@ -57,14 +67,6 @@ def adata(): ) -@pytest.fixture( - params=[sparse.csr_matrix, sparse.csc_matrix, np.array, as_dense_dask_array], - ids=["scipy-csr", "scipy-csc", "np-array", "dask_array"], -) -def mtx_format(request): - return request.param - - @pytest.fixture(params=[sparse.csr_matrix, sparse.csc_matrix]) def sparse_format(request): return request.param @@ -89,12 +91,13 @@ def as_dense(request): @pytest.mark.filterwarnings("ignore:`product` is deprecated as of NumPy 1.25.0") # TODO: Check to make sure obs, obsm, layers, ... are written and read correctly as well @pytest.mark.filterwarnings("error") -def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense): +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_read_write_X(tmp_path, array_type: ArrayType, backed_mode, as_dense): base_pth = Path(tmp_path) orig_pth = base_pth / "orig.h5ad" backed_pth = base_pth / "backed.h5ad" - orig = ad.AnnData(mtx_format(asarray(sparse.random(10, 10, format="csr")))) + orig = ad.AnnData(array_type(sparse.random(10, 10, format="csr"))) orig.write(orig_pth) backed = ad.read_h5ad(orig_pth, backed=backed_mode) @@ -102,7 +105,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense): backed.file.close() from_backed = ad.read_h5ad(backed_pth) - assert np.all(asarray(orig.X) == asarray(from_backed.X)) + assert np.all(to_dense(orig.X) == to_dense(from_backed.X)) # this is very similar to the views test @@ -185,15 +188,13 @@ def test_backed_raw(tmp_path): assert_equal(final_adata, mem_adata) -@pytest.mark.parametrize( - "array_type", - [ - pytest.param(asarray, id="dense_array"), - pytest.param(sparse.csr_matrix, id="csr_matrix"), - pytest.param(sparse.csr_array, id="csr_array"), - ], -) -def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): +@pytest.mark.array_type(skip=Flags.Gpu | Flags.Disk | Flags.Dask) +def test_backed_raw_subset( + tmp_path: Path, + array_type: ArrayType[np.ndarray | CSMatrix | CSArray], + subset_func, + subset_func2, +) -> None: backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" mem_adata = gen_adata((10, 10), X_type=array_type) @@ -201,7 +202,7 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) if ( - array_type is asarray + array_type.cls is np.ndarray and isinstance(obs_idx, list | np.ndarray | CSMatrix | CSArray) and isinstance(var_idx, list | np.ndarray | CSMatrix | CSArray) ): @@ -231,15 +232,10 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): assert_equal(final_adata, backed_v.to_memory()) # assert loading into memory -@pytest.mark.parametrize( - "array_type", - [ - pytest.param(asarray, id="dense_array"), - pytest.param(sparse.csr_matrix, id="csr_matrix"), - pytest.param(as_dense_dask_array, id="dask_array"), - ], -) -def test_to_memory_full(tmp_path, array_type): +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_to_memory_full( + tmp_path: Path, array_type: ArrayType[np.ndarray | CSMatrix | CSArray] +) -> None: backed_pth = tmp_path / "backed.h5ad" mem_adata = gen_adata((15, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) mem_adata.raw = gen_adata((15, 12), X_type=array_type, **GEN_ADATA_DASK_ARGS) diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index b0df62e5e..c7f7b4e1f 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -14,6 +14,7 @@ import pytest import scipy from boltons.iterutils import default_exit, remap, research +from fast_array_utils.conv import to_dense from numpy import ma from packaging.version import Version from scipy import sparse @@ -24,22 +25,40 @@ from anndata.compat import AwkArray, CSArray, CSMatrix, CupySparseMatrix, DaskArray from anndata.tests import helpers from anndata.tests.helpers import ( - BASE_MATRIX_PARAMS, - CUPY_MATRIX_PARAMS, - DASK_MATRIX_PARAMS, DEFAULT_COL_TYPES, GEN_ADATA_DASK_ARGS, - as_dense_dask_array, assert_equal, gen_adata, gen_vstr_recarray, ) -from anndata.utils import asarray +from testing.fast_array_utils import SUPPORTED_TYPES, Flags if TYPE_CHECKING: from collections.abc import Callable from typing import Any, Literal + from testing.fast_array_utils import ArrayType + + +AT_DENSE_DASK = next( + at + for at in SUPPORTED_TYPES + if at.flags & Flags.Dask and not at.flags & (Flags.Sparse | Flags.Gpu) +) + + +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} +GPU_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Gpu and at.flags & Flags.Dask +} +SPARSE_GPU = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Gpu +} +SPARSE_GPU_DASK = SPARSE_GPU | SPARSE_DASK | GPU_DASK + + mark_legacy_concatenate = pytest.mark.filterwarnings( r"ignore:.*AnnData\.concatenate is deprecated:FutureWarning" ) @@ -59,7 +78,7 @@ def _filled_array_np(a, fill_value=None): @filled_like.register(DaskArray) def _filled_array(a, fill_value=None): - return as_dense_dask_array(_filled_array_np(a, fill_value)) + return AT_DENSE_DASK(_filled_array_np(a, fill_value)) # noqa: F821 @filled_like.register(CSMatrix) @@ -94,18 +113,6 @@ def make_idx_tuple(idx, axis): return tuple(tup) -# Will call func(sparse_matrix) so these types should be sparse compatible -# See array_type if only dense arrays are expected as input. -@pytest.fixture(params=BASE_MATRIX_PARAMS + DASK_MATRIX_PARAMS + CUPY_MATRIX_PARAMS) -def array_type(request): - return request.param - - -@pytest.fixture(params=BASE_MATRIX_PARAMS + DASK_MATRIX_PARAMS) -def cpu_array_type(request): - return request.param - - @pytest.fixture(params=["inner", "outer"]) def join_type(request): return request.param @@ -174,6 +181,7 @@ def test_concat_interface_errors(): @mark_legacy_concatenate +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_DASK}) @pytest.mark.parametrize( ("concat_func", "backwards_compat"), [ @@ -181,7 +189,9 @@ def test_concat_interface_errors(): (lambda x, **kwargs: x[0].concatenate(x[1:], **kwargs), True), ], ) -def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_compat): +def test_concatenate_roundtrip( + join_type, array_type: ArrayType, concat_func, backwards_compat +) -> None: adata = gen_adata((100, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) remaining = adata.obs_names @@ -285,10 +295,11 @@ def test_concatenate_dense(): @mark_legacy_concatenate -def test_concatenate_layers(array_type, join_type): +@pytest.mark.array_type(skip=Flags.Disk) +def test_concatenate_layers(array_type: ArrayType, join_type) -> None: adatas = [] for _ in range(5): - a = array_type(sparse.random(100, 200, format="csr")) + a = array_type(sparse.random(100, 200, format="csr").toarray()) adatas.append(AnnData(X=a, layers={"a": a})) merged = adatas[0].concatenate(adatas[1:], join=join_type) @@ -455,10 +466,11 @@ def test_concat_annot_join(obsm_adatas, join_type): @mark_legacy_concatenate +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_DASK}) def test_concatenate_layers_misaligned(array_type, join_type): adatas = [] for _ in range(5): - a = array_type(sparse.random(100, 200, format="csr")) + a = array_type(sparse.random(100, 200, format="csr").toarray()) adata = AnnData(X=a, layers={"a": a}) adatas.append( adata[:, np.random.choice(adata.var_names, 150, replace=False)].copy() @@ -469,18 +481,19 @@ def test_concatenate_layers_misaligned(array_type, join_type): @mark_legacy_concatenate +@pytest.mark.array_type(skip={Flags.Disk | Flags.Gpu, *SPARSE_DASK}) def test_concatenate_layers_outer(array_type, fill_val): # Testing that issue #368 is fixed a = AnnData( X=np.ones((10, 20)), - layers={"a": array_type(sparse.random(10, 20, format="csr"))}, + layers={"a": array_type(sparse.random(10, 20, format="csr").toarray())}, ) b = AnnData(X=np.ones((10, 20))) c = a.concatenate(b, join="outer", fill_value=fill_val, batch_categories=["a", "b"]) np.testing.assert_array_equal( - asarray(c[c.obs["batch"] == "b"].layers["a"]), fill_val + to_dense(c[c.obs["batch"] == "b"].layers["a"]), fill_val ) @@ -526,7 +539,7 @@ def get_obs_els(adata): for k, cur_v in cur_els.items(): orig_v = orig_els.get(k, sparse.csr_matrix((orig.n_obs, 0))) assert_equal(cur_v[:, : orig_v.shape[1]], orig_v) - np.testing.assert_equal(asarray(cur_v[:, orig_v.shape[1] :]), fill_val) + np.testing.assert_equal(to_dense(cur_v[:, orig_v.shape[1] :]), fill_val) ptr += orig.n_obs @@ -836,7 +849,8 @@ def test_awkward_does_not_mix(join_type, other): concat([adata_a, adata_b], join=join_type) -def test_pairwise_concat(axis_name, array_type): +@pytest.mark.array_type(skip=Flags.Disk | Flags.Gpu) +def test_pairwise_concat(axis_name, array_type: ArrayType) -> None: axis, axis_name = merge._resolve_axis(axis_name) _, alt_axis_name = merge._resolve_axis(1 - axis) axis_sizes = [[100, 200, 50], [50, 50, 50]] @@ -847,7 +861,7 @@ def test_pairwise_concat(axis_name, array_type): alt_attr = f"{alt_axis_name}p" def gen_axis_array(m): - return array_type(sparse.random(m, m, format="csr", density=0.1)) + return array_type(sparse.random(m, m, format="csr", density=0.1).toarray()) adatas = { k: AnnData( @@ -894,14 +908,15 @@ def gen_axis_array(m): ) -def test_nan_merge(axis_name, join_type, array_type): +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_GPU_DASK}) +def test_nan_merge(axis_name, join_type, array_type: ArrayType) -> None: axis, _ = merge._resolve_axis(axis_name) alt_axis, alt_axis_name = merge._resolve_axis(1 - axis) mapping_attr = f"{alt_axis_name}m" adata_shape = (20, 10) arr = array_type( - sparse.random(adata_shape[alt_axis], 10, density=0.1, format="csr") + sparse.random(adata_shape[alt_axis], 10, density=0.1, format="csr").toarray() ) arr_nan = arr.copy() with warnings.catch_warnings(): @@ -1161,6 +1176,7 @@ def test_concatenate_uns(unss, merge_strategy, result, value_gen): assert_equal(merged, result, elem_name="uns") +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_DASK}) def test_transposed_concat(array_type, axis_name, join_type, merge_strategy): axis, axis_name = merge._resolve_axis(axis_name) alt_axis = 1 - axis @@ -1484,16 +1500,18 @@ def test_concat_null_X(): assert_equal(no_X, orig) -# https://github.com/scverse/ehrapy/issues/151#issuecomment-1016753744 +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_DASK, *SPARSE_GPU}) @pytest.mark.parametrize("sparse_indexer_type", [np.int64, np.int32]) -def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): +def test_concat_X_dtype( + array_type: ArrayType, sparse_indexer_type: type[np.int64 | np.int32] +) -> None: + """See """ adatas_orig = { - k: AnnData(cpu_array_type(np.ones((20, 10), dtype=np.int8))) - for k in list("abc") + k: AnnData(array_type(np.ones((20, 10), dtype=np.int8))) for k in list("abc") } for adata in adatas_orig.values(): - adata.raw = AnnData(cpu_array_type(np.ones((20, 30), dtype=np.float64))) - if sparse.issparse(adata.X): + adata.raw = AnnData(array_type(np.ones((20, 30), dtype=np.float64))) + if isinstance(adata.X, CSArray | CSMatrix): adata.X.indptr = adata.X.indptr.astype(sparse_indexer_type) adata.X.indices = adata.X.indices.astype(sparse_indexer_type) @@ -1501,14 +1519,14 @@ def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): assert result.X.dtype == np.int8 assert result.raw.X.dtype == np.float64 - if sparse.issparse(result.X): + if isinstance(result.X, CSArray | CSMatrix): # https://github.com/scipy/scipy/issues/20389 was merged in 1.15 but is still an issue with matrix if sparse_indexer_type == np.int64 and ( ( - (issubclass(cpu_array_type, CSArray) or adata.X.format == "csc") + (issubclass(array_type.cls, CSArray) or adata.X.format == "csc") and Version(scipy.__version__) < Version("1.15.0") ) - or issubclass(cpu_array_type, CSMatrix) + or issubclass(array_type.cls, CSMatrix) ): pytest.xfail( "Data type int64 is not maintained for sparse matrices or csc array" @@ -1524,7 +1542,8 @@ def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): # Tests how dask plays with other types on concatenation. -def test_concat_different_types_dask(merge_strategy, array_type): +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_GPU_DASK}) +def test_concat_different_types_dask(merge_strategy, array_type: ArrayType) -> None: import dask.array as da from scipy import sparse @@ -1533,7 +1552,9 @@ def test_concat_different_types_dask(merge_strategy, array_type): varm_array = sparse.random(5, 20, density=0.5, format="csr") ad1 = ad.AnnData(X=np.ones((5, 5)), varm={"a": varm_array}) - ad1_other = ad.AnnData(X=np.ones((5, 5)), varm={"a": array_type(varm_array)}) + ad1_other = ad.AnnData( + X=np.ones((5, 5)), varm={"a": array_type(varm_array.toarray())} + ) ad2 = ad.AnnData(X=np.zeros((5, 5)), varm={"a": da.ones(5, 20)}) result1 = ad.concat([ad1, ad2], merge=merge_strategy) @@ -1661,7 +1682,8 @@ def test_error_on_mixed_device(): concat(p) -def test_concat_on_var_outer_join(array_type): +@pytest.mark.array_type(skip={Flags.Disk, *SPARSE_DASK, *GPU_DASK}) +def test_concat_on_var_outer_join(array_type: ArrayType) -> None: # https://github.com/scverse/anndata/issues/1286 a = AnnData( obs=pd.DataFrame(index=[f"cell_{i:02d}" for i in range(10)]), diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index 20cb99f27..a4972bbf2 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import pytest +from fast_array_utils.conv import to_dense from scipy import sparse from anndata import AnnData, concat @@ -13,12 +14,14 @@ from anndata.experimental.merge import as_group, concat_on_disk from anndata.io import read_elem, write_elem from anndata.tests.helpers import assert_equal, gen_adata -from anndata.utils import asarray if TYPE_CHECKING: + from collections.abc import Callable from pathlib import Path from typing import Literal + from anndata.compat import CSArray, CSMatrix + GEN_ADATA_OOC_CONCAT_ARGS = dict( obsm_types=( @@ -98,13 +101,15 @@ def assert_eq_concat_on_disk( assert_equal(res1, res2, exact=False) -def get_array_type(array_type, axis): +def get_array_type( + array_type: Literal["array", "sparse", "sparse_array"], axis: Literal[0, 1] +) -> Callable[[np.ndarray], CSArray | CSMatrix | np.ndarray]: if array_type == "sparse": return sparse.csr_matrix if axis == 0 else sparse.csc_matrix if array_type == "sparse_array": return sparse.csr_array if axis == 0 else sparse.csc_array if array_type == "array": - return asarray + return to_dense msg = f"array_type {array_type} not implemented" raise NotImplementedError(msg) diff --git a/tests/test_dask.py b/tests/test_dask.py index 3ed2760b9..4fe1134e9 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -9,20 +9,22 @@ import numpy as np import pandas as pd import pytest -from scipy import sparse import anndata as ad from anndata._core.anndata import AnnData -from anndata.compat import CupyArray, DaskArray +from anndata.compat import DaskArray from anndata.experimental.merge import as_group -from anndata.tests.helpers import ( - GEN_ADATA_DASK_ARGS, - as_dense_cupy_dask_array, - as_dense_dask_array, - as_sparse_dask_array, - assert_equal, - gen_adata, -) +from anndata.tests.helpers import GEN_ADATA_DASK_ARGS, assert_equal, gen_adata +from testing.fast_array_utils import SUPPORTED_TYPES, Flags + +if TYPE_CHECKING: + from anndata.compat import CSArray, CSMatrix, CupyArray, CupyCSRMatrix + from testing.fast_array_utils import ArrayType + + +CUPY_SPARSE = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Gpu +} if TYPE_CHECKING: from pathlib import Path @@ -272,22 +274,15 @@ def test_assign_X(adata): # Test if dask arrays turn into numpy arrays after to_memory is called -@pytest.mark.parametrize( - ("array_func", "mem_type"), - [ - pytest.param(as_dense_dask_array, np.ndarray, id="dense_dask_array"), - pytest.param(as_sparse_dask_array, sparse.csr_matrix, id="sparse_dask_array"), - pytest.param( - as_dense_cupy_dask_array, - CupyArray, - id="cupy_dense_dask_array", - marks=pytest.mark.gpu, - ), +@pytest.mark.array_type(select=Flags.Dask, skip=CUPY_SPARSE) +def test_dask_to_memory_unbacked( + array_type: ArrayType[ + DaskArray, + ArrayType[np.ndarray | CSMatrix | CSArray | CupyArray | CupyCSRMatrix, None], ], -) -def test_dask_to_memory_unbacked(array_func, mem_type): - orig = gen_adata((15, 10), X_type=array_func, **GEN_ADATA_DASK_ARGS) - orig.uns = {"da": {"da": array_func(np.ones((4, 12)))}} +) -> None: + orig = gen_adata((15, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) + orig.uns = {"da": {"da": array_type(np.ones((4, 12)))}} assert isinstance(orig.X, DaskArray) assert isinstance(orig.obsm["da"], DaskArray) @@ -298,11 +293,11 @@ def test_dask_to_memory_unbacked(array_func, mem_type): curr = orig.to_memory() assert_equal(orig, curr) - assert isinstance(curr.X, mem_type) + assert isinstance(curr.X, array_type.inner.cls) assert isinstance(curr.obsm["da"], np.ndarray) assert isinstance(curr.varm["da"], np.ndarray) assert isinstance(curr.layers["da"], np.ndarray) - assert isinstance(curr.uns["da"]["da"], mem_type) + assert isinstance(curr.uns["da"]["da"], array_type.inner.cls) assert isinstance(orig.X, DaskArray) assert isinstance(orig.obsm["da"], DaskArray) assert isinstance(orig.layers["da"], DaskArray) @@ -311,11 +306,12 @@ def test_dask_to_memory_unbacked(array_func, mem_type): # Test if dask arrays turn into numpy arrays after to_memory is called -def test_dask_to_memory_copy_unbacked(): +@pytest.mark.array_type(select=Flags.Dask, skip=Flags.Sparse | Flags.Gpu) +def test_dask_to_memory_copy_unbacked(array_type: ArrayType[DaskArray]) -> None: import numpy as np - orig = gen_adata((15, 10), X_type=as_dense_dask_array, **GEN_ADATA_DASK_ARGS) - orig.uns = {"da": {"da": as_dense_dask_array(np.ones(12))}} + orig = gen_adata((15, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) # noqa: F821 + orig.uns = {"da": {"da": array_type(np.ones(12))}} # noqa: F821 curr = orig.to_memory(copy=True) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index bdf8050d6..b67b69efe 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -5,20 +5,12 @@ import numpy as np import pandas as pd import pytest +from fast_array_utils.conv import to_dense from scipy import sparse import anndata as ad -from anndata.compat import CupyArray, CupyCSRMatrix, DaskArray from anndata.tests.helpers import ( - BASE_MATRIX_PARAMS, - CUPY_MATRIX_PARAMS, - DASK_MATRIX_PARAMS, DEFAULT_COL_TYPES, - as_cupy, - as_cupy_sparse_dask_array, - as_dense_cupy_dask_array, - as_dense_dask_array, - asarray, assert_equal, gen_adata, gen_awkward, @@ -268,61 +260,7 @@ def test_assert_equal_dask_sparse_arrays(): from scipy import sparse x = sparse.random(10, 10, format="csr", density=0.1) - y = da.from_array(asarray(x)) + y = da.from_array(to_dense(x, to_memory=True)) assert_equal(x, y) assert_equal(y, x) - - -@pytest.mark.parametrize( - "input_type", BASE_MATRIX_PARAMS + DASK_MATRIX_PARAMS + CUPY_MATRIX_PARAMS -) -@pytest.mark.parametrize( - ( - "as_dask_type", - "mem_type", - ), - [ - pytest.param( - as_dense_cupy_dask_array, CupyArray, id="cupy_dense", marks=pytest.mark.gpu - ), - pytest.param(as_dense_dask_array, np.ndarray, id="numpy_dense"), - pytest.param( - as_cupy_sparse_dask_array, - CupyCSRMatrix, - id="cupy_csr", - marks=pytest.mark.gpu, - ), - ], -) -def test_as_dask_functions(input_type, as_dask_type, mem_type): - SHAPE = (1000, 100) - - rng = np.random.default_rng(42) - X_source = rng.poisson(size=SHAPE).astype(np.float32) - X_input = input_type(X_source) - X_output = as_dask_type(X_input) - X_computed = X_output.compute() - - assert isinstance(X_output, DaskArray) - assert X_output.shape == SHAPE - assert X_output.dtype == X_input.dtype - - assert isinstance(X_computed, mem_type) - - assert_equal(asarray(X_computed), X_source) - - -@pytest.mark.parametrize( - "dask_matrix_type", - DASK_MATRIX_PARAMS, -) -@pytest.mark.gpu -def test_as_cupy_dask(dask_matrix_type): - SHAPE = (100, 10) - rng = np.random.default_rng(42) - X_cpu = dask_matrix_type(rng.normal(size=SHAPE)) - X_gpu_roundtripped = as_cupy(X_cpu).map_blocks(lambda x: x.get(), meta=X_cpu._meta) - assert isinstance(X_gpu_roundtripped._meta, type(X_cpu._meta)) - assert isinstance(X_gpu_roundtripped.compute(), type(X_cpu.compute())) - assert_equal(X_gpu_roundtripped.compute(), X_cpu.compute()) diff --git a/tests/test_inplace_subset.py b/tests/test_inplace_subset.py index ce0e75c47..9f268d251 100644 --- a/tests/test_inplace_subset.py +++ b/tests/test_inplace_subset.py @@ -1,37 +1,20 @@ from __future__ import annotations -import numpy as np +from typing import TYPE_CHECKING + import pytest -from scipy import sparse - -from anndata.tests.helpers import ( - as_dense_dask_array, - assert_equal, - gen_adata, -) -from anndata.utils import asarray - - -@pytest.fixture( - params=[ - np.array, - sparse.csr_matrix, - sparse.csc_matrix, - sparse.csr_array, - sparse.csc_array, - as_dense_dask_array, - ], - ids=[ - "np_array", - "scipy_csr", - "scipy_csc", - "scipy_csr_array", - "scipy_csc_array", - "dask_array", - ], -) -def matrix_type(request): - return request.param +from fast_array_utils.conv import to_dense + +from anndata.tests.helpers import assert_equal, gen_adata +from testing.fast_array_utils import SUPPORTED_TYPES, Flags + +if TYPE_CHECKING: + from testing.fast_array_utils import ArrayType + + +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} def subset_dim(adata, *, obs=slice(None), var=slice(None)): @@ -41,16 +24,17 @@ def subset_dim(adata, *, obs=slice(None), var=slice(None)): return _subset(adata, (obs, var)) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) # TODO: Test values of .uns -def test_inplace_subset_var(matrix_type, subset_func): - orig = gen_adata((30, 30), X_type=matrix_type) +def test_inplace_subset_var(array_type: ArrayType, subset_func) -> None: + orig = gen_adata((30, 30), X_type=array_type) subset_idx = subset_func(orig.var_names) modified = orig.copy() from_view = orig[:, subset_idx].copy() modified._inplace_subset_var(subset_idx) - assert_equal(asarray(from_view.X), asarray(modified.X), exact=True) + assert_equal(to_dense(from_view.X), to_dense(modified.X), exact=True) assert_equal(from_view.obs, modified.obs, exact=True) assert_equal(from_view.var, modified.var, exact=True) for k in from_view.obsm: @@ -62,15 +46,16 @@ def test_inplace_subset_var(matrix_type, subset_func): assert_equal(from_view.layers[k], modified.layers[k], exact=True) -def test_inplace_subset_obs(matrix_type, subset_func): - orig = gen_adata((30, 30), X_type=matrix_type) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_inplace_subset_obs(array_type: ArrayType, subset_func) -> None: + orig = gen_adata((30, 30), X_type=array_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() from_view = orig[subset_idx, :].copy() modified._inplace_subset_obs(subset_idx) - assert_equal(asarray(from_view.X), asarray(modified.X), exact=True) + assert_equal(to_dense(from_view.X), to_dense(modified.X), exact=True) assert_equal(from_view.obs, modified.obs, exact=True) assert_equal(from_view.var, modified.var, exact=True) for k in from_view.obsm: diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 7f8bcfc86..c68fe0f9c 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -23,13 +23,7 @@ from anndata.compat import CSArray, CSMatrix, ZarrGroup, _read_attr, is_zarr_v2 from anndata.experimental import read_elem_lazy from anndata.io import read_elem, write_elem -from anndata.tests.helpers import ( - as_cupy, - as_cupy_sparse_dask_array, - as_dense_cupy_dask_array, - assert_equal, - gen_adata, -) +from anndata.tests.helpers import assert_equal, gen_adata if TYPE_CHECKING: from pathlib import Path @@ -258,18 +252,18 @@ def test_io_spec_compressed_scalars(store: G, value: np.ndarray, encoding_type: def test_io_spec_cupy(store, value, encoding_type, as_dask): if as_dask: if isinstance(value, CSMatrix): - value = as_cupy_sparse_dask_array(value, format=encoding_type[:3]) + value = as_cupy_sparse_dask_array(value, format=encoding_type[:3]) # noqa: F821 else: - value = as_dense_cupy_dask_array(value) + value = as_dense_cupy_dask_array(value) # noqa: F821 else: - value = as_cupy(value) + value = as_cupy(value) # noqa: F821 key = f"key_for_{encoding_type}" write_elem(store, key, value, dataset_kwargs={}) assert encoding_type == _read_attr(store[key].attrs, "encoding-type") - from_disk = as_cupy(read_elem(store[key])) + from_disk = as_cupy(read_elem(store[key])) # noqa: F821 assert_equal(value, from_disk) assert get_spec(store[key]) == _REGISTRY.get_spec(value) diff --git a/tests/test_obspvarp.py b/tests/test_obspvarp.py index 42fc47172..403ce55da 100644 --- a/tests/test_obspvarp.py +++ b/tests/test_obspvarp.py @@ -7,11 +7,11 @@ import numpy as np import pandas as pd import pytest +from fast_array_utils.conv import to_dense from scipy import sparse from anndata import AnnData from anndata.tests.helpers import gen_typed_df_t2_size -from anndata.utils import asarray M, N = (200, 100) @@ -40,10 +40,10 @@ def test_assigmnent_dict(adata: AnnData): ) adata.obsp = d_obsp for k, v in d_obsp.items(): - assert np.all(asarray(adata.obsp[k]) == asarray(v)) + assert np.all(to_dense(adata.obsp[k]) == to_dense(v)) adata.varp = d_varp for k, v in d_varp.items(): - assert np.all(asarray(adata.varp[k]) == asarray(v)) + assert np.all(to_dense(adata.varp[k]) == to_dense(v)) def test_setting_ndarray(adata: AnnData): diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index b4702e219..6c934d010 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -15,7 +15,7 @@ import pytest import zarr from numba.core.errors import NumbaDeprecationWarning -from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix +from scipy.sparse import csr_matrix import anndata as ad from anndata._io.specs.registry import IORegistryError @@ -29,11 +29,20 @@ _read_attr, is_zarr_v2, ) -from anndata.tests.helpers import as_dense_dask_array, assert_equal, gen_adata +from anndata.tests.helpers import assert_equal, gen_adata +from testing.fast_array_utils import SUPPORTED_TYPES, Flags if TYPE_CHECKING: - from typing import Literal + from typing import Any, Literal + from numpy.typing import NDArray + + from testing.fast_array_utils import ArrayType + + +SPARSE_DASK = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask +} HERE = Path(__file__).parent @@ -103,8 +112,8 @@ def dtype(request): # ------------------------------------------------------------------------------ -@pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2): +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_roundtrip(array_type: ArrayType, tmp_path, diskfmt, diskfmt2): pth1 = tmp_path / f"first.{diskfmt}" write1 = lambda x: getattr(x, f"write_{diskfmt}")(pth1) read1 = lambda: getattr(ad, f"read_{diskfmt}")(pth1) @@ -112,7 +121,7 @@ def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2): write2 = lambda x: getattr(x, f"write_{diskfmt2}")(pth2) read2 = lambda: getattr(ad, f"read_{diskfmt2}")(pth2) - adata1 = ad.AnnData(typ(X_list), obs=obs_dict, var=var_dict, uns=uns_dict) + adata1 = ad.AnnData(array_type(X_list), obs=obs_dict, var=var_dict, uns=uns_dict) write1(adata1) adata2 = read1() write2(adata2) @@ -142,9 +151,11 @@ async def _do_test(): @pytest.mark.parametrize("storage", ["h5ad", "zarr"]) -@pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwargs): - X = typ(X_list) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_kitchensink( + tmp_path, storage, array_type: ArrayType, backing_h5ad, dataset_kwargs +): + X = array_type(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not isinstance(adata_src.obs["oanno1"].dtype, pd.CategoricalDtype) adata_src.raw = adata_src.copy() @@ -188,9 +199,11 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa assert_equal(adata, adata_src) -@pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_maintain_X_dtype(typ, backing_h5ad): - X = typ(X_list).astype("int8") +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_maintain_X_dtype( + array_type: ArrayType[NDArray[Any] | CSArray | CSMatrix], backing_h5ad +): + X = array_type(X_list).astype("int8") adata_src = ad.AnnData(X) adata_src.write(backing_h5ad) @@ -221,9 +234,9 @@ def test_maintain_layers(rw): assert not np.any((orig.layers["sparse"] != curr.layers["sparse"]).toarray()) -@pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): - X = typ(X_list) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_h5ad_one_dimension(array_type: ArrayType, backing_h5ad): + X = array_type(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_one = adata_src[:, 0].copy() adata_one.write(backing_h5ad) @@ -232,9 +245,9 @@ def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): assert_equal(adata, adata_one) -@pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_backed(typ, backing_h5ad): - X = typ(X_list) +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_backed(array_type: ArrayType, backing_h5ad) -> None: + X = array_type(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.filename = backing_h5ad # change to backed mode adata_src.write() @@ -247,15 +260,13 @@ def test_readwrite_backed(typ, backing_h5ad): assert_equal(adata, adata_src) -@pytest.mark.parametrize( - "typ", [np.array, csr_matrix, csc_matrix, csr_array, csc_array] -) -def test_readwrite_equivalent_h5ad_zarr(tmp_path, typ): +@pytest.mark.array_type(skip={Flags.Gpu | Flags.Disk, *SPARSE_DASK}) +def test_readwrite_equivalent_h5ad_zarr(tmp_path: Path, array_type: ArrayType) -> None: h5ad_pth = tmp_path / "adata.h5ad" zarr_pth = tmp_path / "adata.zarr" M, N = 100, 101 - adata = gen_adata((M, N), X_type=typ) + adata = gen_adata((M, N), X_type=array_type) adata.raw = adata.copy() adata.write_h5ad(h5ad_pth) diff --git a/tests/test_views.py b/tests/test_views.py index f67f315e7..b27eb942d 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -3,13 +3,14 @@ from contextlib import ExitStack from copy import deepcopy from operator import mul -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import joblib import numpy as np import pandas as pd import pytest from dask.base import tokenize +from fast_array_utils.conv import to_dense from packaging.version import Version from scipy import sparse @@ -22,11 +23,8 @@ SparseCSRArrayView, SparseCSRMatrixView, ) -from anndata.compat import CupyCSCMatrix, DaskArray +from anndata.compat import CSArray, CSMatrix, CupyCSCMatrix, DaskArray from anndata.tests.helpers import ( - BASE_MATRIX_PARAMS, - CUPY_MATRIX_PARAMS, - DASK_MATRIX_PARAMS, GEN_ADATA_DASK_ARGS, assert_equal, gen_adata, @@ -34,10 +32,23 @@ slice_subset, subset_func, ) -from anndata.utils import asarray +from testing.fast_array_utils import SUPPORTED_TYPES, Flags if TYPE_CHECKING: from types import EllipsisType + from typing import Literal + + from testing.fast_array_utils import ArrayType + + +DASK_SPARRAY = { + at + for at in SUPPORTED_TYPES + if at.flags & Flags.Sparse and at.flags & Flags.Dask and not at.flags & Flags.Matrix +} +CUPY_SPARSE = { + at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Gpu +} IGNORE_SPARSE_EFFICIENCY_WARNING = pytest.mark.filterwarnings( "ignore:Changing the sparsity structure:scipy.sparse.SparseEfficiencyWarning" @@ -77,25 +88,8 @@ def adata(): return adata -@pytest.fixture( - params=BASE_MATRIX_PARAMS + DASK_MATRIX_PARAMS + CUPY_MATRIX_PARAMS, -) -def matrix_type(request): - return request.param - - -@pytest.fixture(params=BASE_MATRIX_PARAMS + DASK_MATRIX_PARAMS) -def matrix_type_no_gpu(request): - return request.param - - -@pytest.fixture(params=BASE_MATRIX_PARAMS) -def matrix_type_base(request): - return request.param - - @pytest.fixture(params=["layers", "obsm", "varm"]) -def mapping_name(request): +def mapping_name(request: pytest.FixtureRequest) -> Literal["layers", "obsm", "varm"]: return request.param @@ -155,14 +149,17 @@ def test_view_subset_shapes(): assert {k: v.shape[0] for k, v in view.varm.items()} == {k: 5 for k in view.varm} -def test_modify_view_component(matrix_type, mapping_name, request): - adata = ad.AnnData( - np.zeros((10, 10)), - **{mapping_name: dict(m=matrix_type(asarray(sparse.random(10, 10))))}, - ) +@pytest.mark.array_type(skip={Flags.Disk, *CUPY_SPARSE}) +def test_modify_view_component( + array_type: ArrayType, + mapping_name: Literal["layers", "obsm", "varm"], + request: pytest.FixtureRequest, +) -> None: + m = array_type(sparse.random(10, 10, format="csr")) + adata = ad.AnnData(np.zeros((10, 10)), **{mapping_name: dict(m=m)}) # Fix if and when dask supports tokenizing GPU arrays # https://github.com/dask/dask/issues/6718 - if isinstance(matrix_type(np.zeros((1, 1))), DaskArray): + if isinstance(array_type(np.zeros((1, 1))), DaskArray): hash_func = tokenize else: hash_func = joblib.hash @@ -311,8 +308,9 @@ def test_set_varm(adata): # TODO: Determine if this is the intended behavior, # or just the behaviour we’ve had for a while @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_not_set_subset_X(matrix_type_base, subset_func): - adata = ad.AnnData(matrix_type_base(asarray(sparse.random(20, 20)))) +@pytest.mark.array_type(skip=Flags.Gpu | Flags.Disk | Flags.Dask) +def test_not_set_subset_X(array_type: ArrayType, subset_func) -> None: + adata = ad.AnnData(array_type(sparse.random(20, 20, format="csr"))) init_hash = joblib.hash(adata) orig_X_val = adata.X.copy() while True: @@ -330,7 +328,7 @@ def test_not_set_subset_X(matrix_type_base, subset_func): with pytest.warns(ad.ImplicitModificationWarning, match=r".*X.*"): subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(adata.X != orig_X_val)) + assert not np.any(to_dense(adata.X != orig_X_val)) assert init_hash == joblib.hash(adata) assert isinstance(subset.X, type(adata.X)) @@ -339,8 +337,9 @@ def test_not_set_subset_X(matrix_type_base, subset_func): # TODO: Determine if this is the intended behavior, # or just the behaviour we’ve had for a while @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): - adata = ad.AnnData(matrix_type_no_gpu(asarray(sparse.random(20, 20)))) +@pytest.mark.array_type(skip=Flags.Gpu | Flags.Disk) +def test_not_set_subset_X_dask(array_type: ArrayType, subset_func) -> None: + adata = ad.AnnData(array_type(sparse.random(20, 20, format="csr"))) init_hash = tokenize(adata) orig_X_val = adata.X.copy() while True: @@ -358,15 +357,16 @@ def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): with pytest.warns(ad.ImplicitModificationWarning, match=r".*X.*"): subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(adata.X != orig_X_val)) + assert not np.any(to_dense(adata.X != orig_X_val)) assert init_hash == tokenize(adata) assert isinstance(subset.X, type(adata.X)) @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_set_scalar_subset_X(matrix_type, subset_func): - adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) +@pytest.mark.array_type(skip={Flags.Disk, *CUPY_SPARSE, *DASK_SPARRAY}) +def test_set_scalar_subset_X(array_type: ArrayType, subset_func): + adata = ad.AnnData(array_type(np.zeros((10, 10)))) orig_X_val = adata.X.copy() subset_idx = subset_func(adata.obs_names) @@ -375,15 +375,15 @@ def test_set_scalar_subset_X(matrix_type, subset_func): adata_subset.X = 1 assert adata_subset.is_view - assert np.all(asarray(adata[subset_idx, :].X) == 1) + assert np.all(to_dense(adata[subset_idx, :].X) == 1) if isinstance(adata.X, CupyCSCMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 - assert asarray(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul( + assert to_dense(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul( *adata_subset.shape ) else: - assert asarray(orig_X_val != adata.X).sum() == mul(*adata_subset.shape) + assert to_dense(orig_X_val != adata.X).sum() == mul(*adata_subset.shape) # TODO: Use different kind of subsetting for adata and view @@ -527,8 +527,9 @@ def test_layers_view(): # TODO: This can be flaky. Make that stop -def test_view_of_view(matrix_type, subset_func, subset_func2): - adata = gen_adata((30, 15), X_type=matrix_type) +@pytest.mark.array_type(skip={Flags.Disk, *DASK_SPARRAY, *CUPY_SPARSE}) +def test_view_of_view(array_type: ArrayType, subset_func, subset_func2) -> None: + adata = gen_adata((30, 15), X_type=array_type) adata.raw = adata.copy() if subset_func is single_subset: pytest.xfail("Other subset generating functions have trouble with this") @@ -563,9 +564,9 @@ def test_view_of_view_modification(): adata.X = sparse.csr_matrix(adata.X) adata[0, :][:, 5:].X = np.ones(5) * 2 - assert np.all(asarray(adata.X)[0, 5:] == np.ones(5) * 2) + assert np.all(to_dense(adata.X)[0, 5:] == np.ones(5) * 2) adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) * 2 - assert np.all(asarray(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2) + assert np.all(to_dense(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2) def test_double_index(subset_func, subset_func2): @@ -575,13 +576,14 @@ def test_double_index(subset_func, subset_func2): v1 = adata[obs_subset, var_subset] v2 = adata[obs_subset, :][:, var_subset] - assert np.all(asarray(v1.X) == asarray(v2.X)) + assert np.all(to_dense(v1.X) == to_dense(v2.X)) assert np.all(v1.obs == v2.obs) assert np.all(v1.var == v2.var) -def test_view_different_type_indices(matrix_type): - orig = gen_adata((30, 30), X_type=matrix_type) +@pytest.mark.array_type(skip={Flags.Disk, *CUPY_SPARSE, *DASK_SPARRAY}) +def test_view_different_type_indices(array_type: ArrayType) -> None: + orig = gen_adata((30, 30), X_type=array_type) boolean_array_mask = np.random.randint(0, 2, 30).astype("bool") boolean_list_mask = boolean_array_mask.tolist() integer_array_mask = np.where(boolean_array_mask)[0] @@ -699,26 +701,21 @@ def test_deepcopy_subset(adata, spmat: type): np.testing.assert_array_equal(adata.obsp["spmat"].shape, (10, 10)) -array_type = [ - asarray, - sparse.csr_matrix, - sparse.csc_matrix, - sparse.csr_array, - sparse.csc_array, -] - - -# https://github.com/scverse/anndata/issues/680 -@pytest.mark.parametrize("array_type", array_type) +@pytest.mark.array_type(skip=Flags.Gpu | Flags.Disk | Flags.Dask) @pytest.mark.parametrize("attr", ["X", "layers", "obsm", "varm", "obsp", "varp"]) -def test_view_mixin_copies_data(adata, array_type: type, attr): +def test_view_mixin_copies_data( + adata: ad.AnnData, + array_type: ArrayType[np.ndarray | CSMatrix | CSArray], + attr: Literal["X", "layers", "obsm", "varm", "obsp", "varp"], +) -> None: + """See """ N = 100 adata = ad.AnnData( obs=pd.DataFrame(index=np.arange(N).astype(str)), var=pd.DataFrame(index=np.arange(N).astype(str)), ) - X = array_type(sparse.eye(N, N).multiply(np.arange(1, N + 1))) + X = array_type(sparse.eye(N, N).multiply(np.arange(1, N + 1)).toarray()) if attr == "X": adata.X = X else: @@ -733,7 +730,7 @@ def test_view_mixin_copies_data(adata, array_type: type, attr): arr_view_copy = arr_view.copy() - if sparse.issparse(X): + if isinstance(X, CSMatrix | CSArray): assert not np.shares_memory(arr_view.indices, arr_view_copy.indices) assert not np.shares_memory(arr_view.indptr, arr_view_copy.indptr) assert not np.shares_memory(arr_view.data, arr_view_copy.data) @@ -789,12 +786,13 @@ def test_dataframe_view_index_setting(): assert a2.obs.index.values.tolist() == ["a", "b"] +@pytest.mark.array_type(skip=Flags.Disk) def test_ellipsis_index( ellipsis_index: tuple[EllipsisType | slice, ...] | EllipsisType, equivalent_ellipsis_index: tuple[slice, slice], - matrix_type, + array_type: ArrayType, ): - adata = gen_adata((10, 10), X_type=matrix_type, **GEN_ADATA_DASK_ARGS) + adata = gen_adata((10, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) subset_ellipsis = adata[ellipsis_index] subset = adata[equivalent_ellipsis_index] assert_equal(subset_ellipsis, subset) diff --git a/tests/test_x.py b/tests/test_x.py index 16e6f672f..e41740d98 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -2,38 +2,46 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import pandas as pd import pytest +from fast_array_utils.conv import to_dense from scipy import sparse import anndata as ad from anndata import AnnData from anndata._warnings import ImplicitModificationWarning from anndata.tests.helpers import assert_equal, gen_adata -from anndata.utils import asarray - -UNLABELLED_ARRAY_TYPES = [ - pytest.param(sparse.csr_matrix, id="csr"), - pytest.param(sparse.csc_matrix, id="csc"), - pytest.param(sparse.csr_array, id="csr_array"), - pytest.param(sparse.csc_array, id="csc_array"), - pytest.param(asarray, id="ndarray"), -] +from testing.fast_array_utils import SUPPORTED_TYPES, Flags + +if TYPE_CHECKING: + from anndata.compat import CSArray, CSMatrix + from testing.fast_array_utils import ArrayType + + +NON_BASIC_FLAGS = Flags.Gpu | Flags.Disk | Flags.Dask SINGULAR_SHAPES = [ pytest.param(shape, id=str(shape)) for shape in [(1, 10), (10, 1), (1, 1)] ] +@pytest.mark.array_type(skip=NON_BASIC_FLAGS) @pytest.mark.parametrize("shape", SINGULAR_SHAPES) -@pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES) -@pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) -def test_setter_singular_dim(shape, orig_array_type, new_array_type): +@pytest.mark.parametrize( + "new_array_type", [t for t in SUPPORTED_TYPES if not (t.flags & NON_BASIC_FLAGS)] +) +def test_setter_singular_dim( + shape, + array_type: ArrayType[np.ndarray | CSMatrix | CSArray], + new_array_type: ArrayType[np.ndarray | CSMatrix | CSArray], +) -> None: # https://github.com/scverse/anndata/issues/500 - adata = gen_adata(shape, X_type=orig_array_type) + adata = gen_adata(shape, X_type=array_type) to_assign = new_array_type(np.ones(shape)) adata.X = to_assign - np.testing.assert_equal(asarray(adata.X), 1) + np.testing.assert_equal(to_dense(adata.X), 1) assert isinstance(adata.X, type(to_assign)) @@ -48,10 +56,15 @@ def test_repeat_indices_view(): subset.X = mat -@pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES) -@pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) -def test_setter_view(orig_array_type, new_array_type): - adata = gen_adata((10, 10), X_type=orig_array_type) +@pytest.mark.array_type(skip=NON_BASIC_FLAGS) +@pytest.mark.parametrize( + "new_array_type", [t for t in SUPPORTED_TYPES if not (t.flags & NON_BASIC_FLAGS)] +) +def test_setter_view( + array_type: ArrayType[np.ndarray | CSMatrix | CSArray], + new_array_type: ArrayType[np.ndarray | CSMatrix | CSArray], +) -> None: + adata = gen_adata((10, 10), X_type=array_type) orig_X = adata.X to_assign = new_array_type(np.ones((9, 9))) if isinstance(orig_X, np.ndarray) and sparse.issparse(to_assign): @@ -59,7 +72,7 @@ def test_setter_view(orig_array_type, new_array_type): pytest.xfail("Cannot set a dense array with a sparse array") view = adata[:9, :9] view.X = to_assign - np.testing.assert_equal(asarray(view.X), np.ones((9, 9))) + np.testing.assert_equal(to_dense(view.X), np.ones((9, 9))) assert isinstance(view.X, type(orig_X))