Skip to content

Commit 0bc2b39

Browse files
(feat): allow reading of scipy.sparse.cs{c,r}_array (#1633)
* (chore): export `read_elem` and `write_elem` from the main package * (chore): pr number * (fix): agnostic way of importing * (fix): add `RWAble` to `api.md` * (fix): `md` file import * (fix): clarify public backed sparse docstring/api * (chore): small fixes * (fix): `format` + `to_memory` * (chore): remove deprecation tests + `SparseDataset` * (chore): clean up private/public api * (fix): `test_append_overflow_check` used `indptr` * (fix): export `InMemoryElem` * (chore): release note * (chore): move `InMemoryElem` to the "extras" section * Update src/anndata/_core/sparse_dataset.py * (fix): remove dead tests * (feat): allow reading of `scipy.sparse.cs{c,r}_array` * (chore): release note * (fix): add how to do it to release note * (fix): make setting dynamic * (fix): handle min-deps * (fix): don't know why that didn't commit... * (chore): better `msg` * (refactor): change validation to setting object * (fix): add space --------- Co-authored-by: Philipp A. <flying-sheep@web.de>
1 parent 53537b5 commit 0bc2b39

6 files changed

Lines changed: 87 additions & 4 deletions

File tree

docs/release-notes/1633.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow reading sparse data (via {func}`~anndata.read_elem` or {func}`~anndata.experimental.sparse_dataset`) into either {class}`scipy.sparse.csr_array` or {class}`scipy.sparse.csc_array` via {attr}`anndata.settings.shall_use_sparse_array_on_read` {user}`ilan-gold`

src/anndata/_core/sparse_dataset.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from anndata._core.index import _fix_slice_bounds
3030
from anndata.compat import H5Group, ZarrArray, ZarrGroup
3131

32+
from .._settings import settings
3233
from ..compat import SpArray, _read_attr
3334

3435
try:
@@ -236,6 +237,8 @@ def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix:
236237
FORMATS = [
237238
BackedFormat("csr", backed_csr_matrix, ss.csr_matrix),
238239
BackedFormat("csc", backed_csc_matrix, ss.csc_matrix),
240+
BackedFormat("csr", backed_csr_matrix, ss.csr_array),
241+
BackedFormat("csc", backed_csc_matrix, ss.csc_array),
239242
]
240243

241244

@@ -444,7 +447,9 @@ def __getitem__(
444447
# If indexing is array x array it returns a backed_sparse_matrix
445448
# Not sure what the performance is on that operation
446449
if isinstance(sub, BackedSparseMatrix):
447-
return get_memory_class(self.format)(sub)
450+
return get_memory_class(
451+
self.format, use_sparray_in_io=settings.shall_use_sparse_array_on_read
452+
)(sub)
448453
else:
449454
return sub
450455

@@ -582,7 +587,9 @@ def to_memory(self) -> spmatrix | SpArray:
582587
-------
583588
The in-memory representation of the sparse matrix.
584589
"""
585-
format_class = get_memory_class(self.format)
590+
format_class = get_memory_class(
591+
self.format, use_sparray_in_io=settings.shall_use_sparse_array_on_read
592+
)
586593
mtx = format_class(self.shape, dtype=self.dtype)
587594
mtx.data = self.group["data"][...]
588595
mtx.indices = self.group["indices"][...]

src/anndata/_settings.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from types import GenericAlias
1515
from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar, cast
1616

17+
from anndata.compat import CAN_USE_SPARSE_ARRAY
1718
from anndata.compat.exceptiongroups import add_note
1819

1920
if TYPE_CHECKING:
@@ -396,7 +397,7 @@ def __doc__(self):
396397
##################################################################################
397398

398399

399-
def validate_bool(val) -> None:
400+
def validate_bool(val: Any) -> None:
400401
if not isinstance(val, bool):
401402
msg = f"{val} not valid boolean"
402403
raise TypeError(msg)
@@ -428,5 +429,24 @@ def validate_bool(val) -> None:
428429
get_from_env=check_and_get_bool,
429430
)
430431

432+
433+
def validate_sparse_settings(val: Any) -> None:
434+
validate_bool(val)
435+
if not CAN_USE_SPARSE_ARRAY and cast(bool, val):
436+
msg = (
437+
"scipy.sparse.cs{r,c}array is not available in current scipy version. "
438+
"Falling back to scipy.sparse.spmatrix for reading."
439+
)
440+
raise ValueError(msg)
441+
442+
443+
settings.register(
444+
"shall_use_sparse_array_on_read",
445+
default_value=False,
446+
description="Whether or not to use `sparse_array` as the default class when reading in data",
447+
validate=validate_sparse_settings,
448+
get_from_env=check_and_get_bool,
449+
)
450+
431451
##################################################################################
432452
##################################################################################

tests/test_backed_sparse.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,28 @@ def test_dataset_append_memory(
269269
assert_equal(fromdisk, frommem)
270270

271271

272+
@pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix])
273+
def test_read_array(
274+
tmp_path: Path,
275+
sparse_format: Callable[[ArrayLike], sparse.spmatrix],
276+
diskfmt: Literal["h5ad", "zarr"],
277+
):
278+
path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
279+
a = sparse_format(sparse.random(100, 100))
280+
if diskfmt == "zarr":
281+
f = zarr.open_group(path, "a")
282+
else:
283+
f = h5py.File(path, "a")
284+
ad.write_elem(f, "mtx", a)
285+
diskmtx = sparse_dataset(f["mtx"])
286+
if not CAN_USE_SPARSE_ARRAY:
287+
pytest.skip("scipy.sparse.cs{r,c}array not available")
288+
ad.settings.shall_use_sparse_array_on_read = True
289+
assert issubclass(type(diskmtx[...]), SpArray)
290+
ad.settings.shall_use_sparse_array_on_read = False
291+
assert issubclass(type(diskmtx[...]), sparse.spmatrix)
292+
293+
272294
@pytest.mark.parametrize(
273295
("sparse_format", "append_method"),
274296
[

tests/test_io_elementwise.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
write_elem,
2626
)
2727
from anndata._io.specs.registry import IORegistryError
28-
from anndata.compat import ZarrGroup, _read_attr
28+
from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray, ZarrGroup, _read_attr
2929
from anndata.tests.helpers import (
3030
as_cupy,
3131
as_cupy_sparse_dask_array,
@@ -35,6 +35,7 @@
3535
)
3636

3737
if TYPE_CHECKING:
38+
from pathlib import Path
3839
from typing import Literal, TypeVar
3940

4041
from anndata.compat import H5Group
@@ -570,3 +571,22 @@ def test_io_pd_cow(store, copy_on_write):
570571
write_elem(store, "adata", orig)
571572
from_store = read_elem(store["adata"])
572573
assert_equal(orig, from_store)
574+
575+
576+
def test_read_sparse_array(
577+
tmp_path: Path,
578+
sparse_format: Literal["csr", "csc"],
579+
diskfmt: Literal["h5ad", "zarr"],
580+
):
581+
path = tmp_path / f"test.{diskfmt.replace('ad', '')}"
582+
a = sparse.random(100, 100, format=sparse_format)
583+
if diskfmt == "zarr":
584+
f = zarr.open_group(path, "a")
585+
else:
586+
f = h5py.File(path, "a")
587+
ad.write_elem(f, "mtx", a)
588+
if not CAN_USE_SPARSE_ARRAY:
589+
pytest.skip("scipy.sparse.cs{r,c}array not available")
590+
ad.settings.shall_use_sparse_array_on_read = True
591+
mtx = ad.read_elem(f["mtx"])
592+
assert issubclass(type(mtx), SpArray)

tests/test_settings.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,16 @@ class TestEnum(Enum):
243243
)
244244
def test_describe(as_rst: bool, expected: str, settings: SettingsManager):
245245
assert settings.describe("test_var_3", as_rst=as_rst) == expected
246+
247+
248+
def test_shall_use_sparse_array_on_read():
249+
import anndata as ad
250+
251+
if not ad.compat.CAN_USE_SPARSE_ARRAY:
252+
with pytest.raises(
253+
ValueError,
254+
match=r"scipy.sparse.cs{r,c}array is not available in current scipy version",
255+
):
256+
ad.settings.shall_use_sparse_array_on_read = True
257+
else:
258+
ad.settings.shall_use_sparse_array_on_read = True

0 commit comments

Comments
 (0)