Skip to content

Commit 53537b5

Browse files
(fix): clarify public backed sparse docstring/api (#1608)
* (chore): export `read_elem` and `write_elem` from the main package * (chore): pr number * (fix): agnostic way of importing * (fix): add `RWAble` to `api.md` * (fix): `md` file import * (fix): clarify public backed sparse docstring/api * (chore): small fixes * (fix): `format` + `to_memory` * (chore): remove deprecation tests + `SparseDataset` * (chore): clean up private/public api * (fix): `test_append_overflow_check` used `indptr` * (fix): export `InMemoryElem` * (chore): release note * (chore): move `InMemoryElem` to the "extras" section * Update src/anndata/_core/sparse_dataset.py * (fix): remove dead tests --------- Co-authored-by: Philipp A. <flying-sheep@web.de>
1 parent 94b2304 commit 53537b5

3 files changed

Lines changed: 41 additions & 91 deletions

File tree

src/anndata/_core/sparse_dataset.py

Lines changed: 40 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
from collections.abc import Sequence
4545
from typing import Literal
4646

47+
from scipy.sparse import spmatrix
48+
4749
from .._types import GroupStorageType
4850
from .index import Index
4951

@@ -353,9 +355,8 @@ def is_sparse_indexing_overridden(format: Literal["csr", "csc"], row, col):
353355

354356

355357
class BaseCompressedSparseDataset(ABC):
356-
"""Analogous to :class:`h5py.Dataset <h5py:Dataset>` or `zarr.Array`, but for sparse matrices."""
357-
358358
format: Literal["csr", "csc"]
359+
"""The format of the sparse matrix."""
359360
_group: GroupStorageType
360361

361362
def __init__(self, group: GroupStorageType):
@@ -378,6 +379,7 @@ def group(self, val):
378379

379380
@property
380381
def backend(self) -> Literal["zarr", "hdf5"]:
382+
"""Which file type is used on-disk."""
381383
if isinstance(self.group, ZarrGroup):
382384
return "zarr"
383385
elif isinstance(self.group, H5Group):
@@ -387,6 +389,7 @@ def backend(self) -> Literal["zarr", "hdf5"]:
387389

388390
@property
389391
def dtype(self) -> np.dtype:
392+
"""The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
390393
return self.group["data"].dtype
391394

392395
@classmethod
@@ -395,37 +398,19 @@ def _check_group_format(cls, group):
395398
assert group_format == cls.format
396399

397400
@property
398-
def format_str(self) -> Literal["csr", "csc"]:
399-
"""DEPRECATED Use .format instead."""
400-
warnings.warn(
401-
"The attribute .format_str is deprecated and will be removed in the anndata 0.11.0. "
402-
"Please use .format instead.",
403-
FutureWarning,
404-
)
405-
return self.format
406-
407-
@property
408-
def name(self) -> str:
401+
def _name(self) -> str:
402+
"""Name of the group."""
409403
return self.group.name
410404

411405
@property
412406
def shape(self) -> tuple[int, int]:
407+
"""Shape of the matrix read off disk."""
413408
shape = _read_attr(self.group.attrs, "shape", None)
414409
if shape is None:
415410
# TODO warn
416411
shape = self.group.attrs.get("h5sparse_shape")
417412
return tuple(map(int, shape))
418413

419-
@property
420-
def value(self) -> ss.csr_matrix | ss.csc_matrix:
421-
"""DEPRECATED Use .to_memory() instead."""
422-
warnings.warn(
423-
"The .value attribute is deprecated and will be removed in the anndata 0.11.0. "
424-
"Please use .to_memory() instead.",
425-
FutureWarning,
426-
)
427-
return self.to_memory()
428-
429414
def __repr__(self) -> str:
430415
return f"{type(self).__name__}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}"
431416

@@ -483,7 +468,25 @@ def __setitem__(self, index: Index | tuple[()], value) -> None:
483468
mock_matrix[row, col] = value
484469

485470
# TODO: split to other classes?
486-
def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
471+
def append(self, sparse_matrix: spmatrix | SpArray) -> None:
472+
"""Append an in-memory or on-disk sparse matrix to the current object's store.
473+
474+
Parameters
475+
----------
476+
sparse_matrix
477+
The matrix to append.
478+
479+
Raises
480+
------
481+
NotImplementedError
482+
If the matrix to append is not one of :class:`~scipy.sparse.csr_array`, :class:`~scipy.sparse.csc_array`, :class:`~scipy.sparse.csr_matrix`, or :class:`~scipy.sparse.csc_matrix`.
483+
ValueError
484+
If both the on-disk and to-append matrices are not of the same format i.e., `csr` or `csc`.
485+
OverflowError
486+
If the underlying data store has a 32 bit indptr, and the new matrix is too large to fit in it i.e., would cause a 64 bit `indptr` to be written.
487+
AssertionError
488+
If the on-disk data does not have `csc` or `csr` format.
489+
"""
487490
# Prep variables
488491
shape = self.shape
489492
if isinstance(sparse_matrix, BaseCompressedSparseDataset):
@@ -546,7 +549,7 @@ def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
546549
)
547550
# Clear cached property
548551
if hasattr(self, "indptr"):
549-
del self.indptr
552+
del self._indptr
550553

551554
# indices
552555
indices = self.group["indices"]
@@ -555,7 +558,7 @@ def append(self, sparse_matrix: _cs_matrix | SpArray) -> None:
555558
indices[orig_data_size:] = sparse_matrix.indices
556559

557560
@cached_property
558-
def indptr(self) -> np.ndarray:
561+
def _indptr(self) -> np.ndarray:
559562
"""\
560563
Other than `data` and `indices`, this is only as long as the major axis
561564
@@ -569,21 +572,29 @@ def _to_backed(self) -> BackedSparseMatrix:
569572
mtx = format_class(self.shape, dtype=self.dtype)
570573
mtx.data = self.group["data"]
571574
mtx.indices = self.group["indices"]
572-
mtx.indptr = self.indptr
575+
mtx.indptr = self._indptr
573576
return mtx
574577

575-
def to_memory(self) -> ss.csr_matrix | ss.csc_matrix:
578+
def to_memory(self) -> spmatrix | SpArray:
579+
"""Returns an in-memory representation of the sparse matrix.
580+
581+
Returns
582+
-------
583+
The in-memory representation of the sparse matrix.
584+
"""
576585
format_class = get_memory_class(self.format)
577586
mtx = format_class(self.shape, dtype=self.dtype)
578587
mtx.data = self.group["data"][...]
579588
mtx.indices = self.group["indices"][...]
580-
mtx.indptr = self.indptr
589+
mtx.indptr = self._indptr
581590
return mtx
582591

583592

584593
_sparse_dataset_doc = """\
585594
On disk {format} sparse matrix.
586595
596+
Analogous to :class:`h5py.Dataset` or :class:`zarr.core.Array`, but for sparse matrices.
597+
587598
Parameters
588599
----------
589600
group
@@ -662,30 +673,3 @@ def sparse_dataset(group: GroupStorageType) -> CSRDataset | CSCDataset:
662673
@_subset.register(BaseCompressedSparseDataset)
663674
def subset_sparsedataset(d, subset_idx):
664675
return d[subset_idx]
665-
666-
667-
## Backwards compat
668-
669-
_sparsedataset_depr_msg = """\
670-
SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.
671-
672-
For instance checks, use `isinstance(X, (anndata.CSRDataset, anndata.CSCDataset))` instead.
673-
674-
For creation, use `anndata.experimental.sparse_dataset(X)` instead.
675-
"""
676-
677-
678-
class SparseDataset(ABC):
679-
"""DEPRECATED.
680-
681-
Use CSRDataset, CSCDataset, and sparse_dataset from anndata.experimental instead.
682-
"""
683-
684-
def __new__(cls, group):
685-
warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=2)
686-
return sparse_dataset(group)
687-
688-
@classmethod
689-
def __subclasshook__(cls, C):
690-
warnings.warn(FutureWarning(_sparsedataset_depr_msg), stacklevel=3)
691-
return issubclass(C, (CSRDataset, CSCDataset))

tests/test_backed_sparse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ def test_append_overflow_check(group_fn, sparse_class, tmpdir):
549549
backed = sparse_dataset(group["mtx"])
550550

551551
# Checking for correct caching behaviour
552-
backed.indptr
552+
backed._indptr
553553

554554
with pytest.raises(
555555
OverflowError,

tests/test_deprecations.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import h5py
1212
import numpy as np
1313
import pytest
14-
import zarr
1514
from scipy import sparse
1615

1716
import anndata as ad
@@ -129,39 +128,6 @@ def test_deprecated_read(tmp_path):
129128
assert_equal(memory, from_disk)
130129

131130

132-
def test_deprecated_sparse_dataset_values():
133-
import zarr
134-
135-
from anndata.experimental import sparse_dataset
136-
137-
mtx = sparse.random(50, 50, format="csr")
138-
g = zarr.group()
139-
140-
ad.write_elem(g, "mtx", mtx)
141-
mtx_backed = sparse_dataset(g["mtx"])
142-
143-
with pytest.warns(FutureWarning, match=r"Please use .to_memory()"):
144-
mtx_backed.value
145-
146-
with pytest.warns(FutureWarning, match=r"Please use .format"):
147-
mtx_backed.format_str
148-
149-
150-
def test_deprecated_sparse_dataset():
151-
from anndata._core.sparse_dataset import SparseDataset
152-
153-
mem_X = sparse.random(50, 50, format="csr")
154-
g = zarr.group()
155-
ad.write_elem(g, "X", mem_X)
156-
with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
157-
X = SparseDataset(g["X"])
158-
159-
assert isinstance(X, ad.CSRDataset)
160-
161-
with pytest.warns(FutureWarning, match=r"SparseDataset is deprecated"):
162-
assert isinstance(X, SparseDataset)
163-
164-
165131
@pytest.mark.parametrize("name", anndata.experimental._DEPRECATED)
166132
def test_warn_on_import_from_experimental(name: str):
167133
with pytest.warns(FutureWarning, match=rf"Importing {name}"):

0 commit comments

Comments
 (0)