Skip to content

Commit 8266b98

Browse files
authored
add sparse support, rename r to R, improve typing (#103)
* add sparse support Signed-off-by: zethson <lukas.heumos@posteo.net> * fix repr tests Signed-off-by: zethson <lukas.heumos@posteo.net> * shape back Signed-off-by: zethson <lukas.heumos@posteo.net> * fix tests Signed-off-by: zethson <lukas.heumos@posteo.net> * intersphinx Signed-off-by: zethson <lukas.heumos@posteo.net> * remove sparse test Signed-off-by: zethson <lukas.heumos@posteo.net> --------- Signed-off-by: zethson <lukas.heumos@posteo.net>
1 parent 6c1917e commit 8266b98

17 files changed

Lines changed: 400 additions & 175 deletions

File tree

.readthedocs.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# https://docs.readthedocs.io/en/stable/config-file/v2.html
21
version: 2
32
build:
43
os: ubuntu-24.04
@@ -10,5 +9,4 @@ build:
109
- VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH uv pip install .[doc]
1110
sphinx:
1211
configuration: docs/conf.py
13-
# disable this for more lenient docs builds
1412
fail_on_warning: true

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@ and this project adheres to [Semantic Versioning][].
1313
### Added
1414

1515
- Expanded documentation
16-
- Bugfixes
1716
- Improved OMOP Extraction
17+
- Support for [COO sparse matrices](https://github.com/pydata/sparse) for R
18+
19+
### Breaking changes
20+
21+
- Renamed `r` to `R`
1822

1923
## [0.0.1] - 2024-11-04
2024

docs/conf.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@
150150
("py:class", "numpy.int64"),
151151
# For now not in public facing API
152152
("py:class", "awkward.highlevel.Array"),
153+
("py:class", "h5py._hl.dataset.Dataset"),
154+
("py:class", "zarr.core.Array"),
155+
("py:class", "ehrdata._compat.ZappyArray"),
156+
("py:class", "dask.array.core.Array"),
157+
("py:class", "anndata.compat.CupyArray"),
158+
("py:class", "anndata.compat.CupySparseMatrix"),
159+
("py:class", "sparse.numba_backend._coo.core.COO"),
153160
]
154161

155162
# Redirect broken parameter annotation classes

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,28 @@ readme = "README.md"
1010
license = "Apache-2.0"
1111
maintainers = [
1212
{ name = "Eljas Roellin", email = "eljas.roellin@helmholtz-munich.de" },
13+
{ name = "Lukas Heumos", email = "lukas.heumos@posteo.net" },
1314
]
1415
authors = [
1516
{ name = "Eljas Roellin, Lukas Heumos, Xinyue Zhang" },
1617
]
17-
requires-python = ">=3.10"
18+
requires-python = ">=3.11,<3.14"
1819
classifiers = [
1920
"Programming Language :: Python :: 3 :: Only",
20-
"Programming Language :: Python :: 3.10",
2121
"Programming Language :: Python :: 3.11",
2222
"Programming Language :: Python :: 3.12",
2323
"Programming Language :: Python :: 3.13",
2424
]
2525
dependencies = [
2626
"anndata",
27-
"awkward",
2827
"duckdb",
2928
"filelock",
3029
# for debug logging (referenced from the issue template)
3130
"lamin-utils",
3231
"requests",
3332
"rich",
3433
"session-info2",
34+
"sparse",
3535
"xarray",
3636
]
3737
optional-dependencies.dev = [

src/ehrdata/_compat.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
from __future__ import annotations
2+
3+
from functools import cache
4+
from importlib.util import find_spec
5+
from types import EllipsisType
6+
from typing import TYPE_CHECKING
7+
from warnings import warn
8+
9+
import h5py
10+
import numpy as np
11+
import pandas as pd
12+
import scipy
13+
14+
if TYPE_CHECKING:
15+
pass
16+
17+
#############################
18+
# scipy sparse array compat #
19+
#############################
20+
21+
22+
CSMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix
23+
CSArray = scipy.sparse.csr_array | scipy.sparse.csc_array
24+
25+
26+
class Empty:
27+
pass
28+
29+
30+
Index1D = slice | int | str | np.int64 | np.ndarray | pd.Series
31+
IndexRest = Index1D | EllipsisType
32+
Index = (
33+
IndexRest
34+
| tuple[Index1D, IndexRest]
35+
| tuple[IndexRest, Index1D]
36+
| tuple[Index1D, Index1D, EllipsisType]
37+
| tuple[EllipsisType, Index1D, Index1D]
38+
| tuple[Index1D, EllipsisType, Index1D]
39+
| CSMatrix
40+
| CSArray
41+
)
42+
H5Group = h5py.Group
43+
H5Array = h5py.Dataset
44+
H5File = h5py.File
45+
46+
47+
#############################
48+
# Optional deps
49+
#############################
50+
@cache
51+
def is_zarr_v2() -> bool:
52+
import zarr
53+
from packaging.version import Version
54+
55+
return Version(zarr.__version__) < Version("3.0.0")
56+
57+
58+
if is_zarr_v2():
59+
msg = "anndata will no longer support zarr v2 in the near future. Please prepare to upgrade to zarr>=3."
60+
warn(msg, DeprecationWarning, stacklevel=2)
61+
62+
63+
if find_spec("awkward") or TYPE_CHECKING:
64+
import awkward # noqa: F401
65+
from awkward import Array as AwkArray
66+
else:
67+
68+
class AwkArray:
69+
@staticmethod
70+
def __repr__():
71+
return "mock awkward.highlevel.Array"
72+
73+
74+
if find_spec("zappy") or TYPE_CHECKING:
75+
from zappy.base import ZappyArray
76+
else:
77+
78+
class ZappyArray:
79+
@staticmethod
80+
def __repr__():
81+
return "mock zappy.base.ZappyArray"
82+
83+
84+
if TYPE_CHECKING:
85+
# type checkers are confused and can only see …core.Array
86+
from dask.array.core import Array as DaskArray
87+
elif find_spec("dask"):
88+
from dask.array import Array as DaskArray
89+
else:
90+
91+
class DaskArray:
92+
@staticmethod
93+
def __repr__():
94+
return "mock dask.array.core.Array"
95+
96+
97+
# https://github.com/scverse/anndata/issues/1749
98+
def is_cupy_importable() -> bool:
99+
try:
100+
import cupy # noqa: F401
101+
except ImportError:
102+
return False
103+
return True
104+
105+
106+
if is_cupy_importable() or TYPE_CHECKING:
107+
from cupy import ndarray as CupyArray
108+
from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix
109+
from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix
110+
from cupyx.scipy.sparse import spmatrix as CupySparseMatrix
111+
112+
try:
113+
import dask.array as da
114+
except ImportError:
115+
pass
116+
else:
117+
da.register_chunk_type(CupyCSRMatrix)
118+
da.register_chunk_type(CupyCSCMatrix)
119+
else:
120+
121+
class CupySparseMatrix:
122+
@staticmethod
123+
def __repr__():
124+
return "mock cupyx.scipy.sparse.spmatrix"
125+
126+
class CupyCSRMatrix:
127+
@staticmethod
128+
def __repr__():
129+
return "mock cupyx.scipy.sparse.csr_matrix"
130+
131+
class CupyCSCMatrix:
132+
@staticmethod
133+
def __repr__():
134+
return "mock cupyx.scipy.sparse.csc_matrix"
135+
136+
class CupyArray:
137+
@staticmethod
138+
def __repr__():
139+
return "mock cupy.ndarray"
140+
141+
142+
def lazy_import_torch() -> None:
143+
try:
144+
import torch
145+
146+
return torch
147+
except ImportError:
148+
raise ImportError(
149+
"The optional module 'torch' is not installed. Please install it using 'pip install ehrdata[torch]'."
150+
) from None

src/ehrdata/_types.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from typing import Literal, TypeAlias
2+
3+
import numpy as np
4+
import scipy
5+
from anndata import abc
6+
from anndata.compat import CupyArray, CupySparseMatrix, DaskArray, H5Array, H5Group, ZarrArray, ZarrGroup
7+
from numpy import ma
8+
from sparse import COO
9+
10+
from ehrdata._compat import ZappyArray
11+
12+
ArrayStorageType: TypeAlias = ZarrArray | H5Array
13+
GroupStorageType: TypeAlias = ZarrGroup | H5Group
14+
StorageType: TypeAlias = ArrayStorageType | GroupStorageType
15+
16+
CSMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix
17+
CSArray = scipy.sparse.csr_array | scipy.sparse.csc_array
18+
19+
XDataType: TypeAlias = (
20+
np.ndarray
21+
| ma.MaskedArray
22+
| CSMatrix
23+
| CSArray
24+
| H5Array
25+
| ZarrArray
26+
| ZappyArray
27+
| abc.CSRDataset
28+
| abc.CSCDataset
29+
| DaskArray
30+
| CupyArray
31+
| CupySparseMatrix
32+
)
33+
34+
RDataType: TypeAlias = np.ndarray | COO | DaskArray
35+
36+
EHRDataElem = Literal[
37+
"obs",
38+
"var",
39+
"t",
40+
"obsm",
41+
"varm",
42+
"obsp",
43+
"varp",
44+
"layers",
45+
"X",
46+
"R",
47+
"raw",
48+
"uns",
49+
]
50+
51+
Join_T = Literal["inner", "outer"]

src/ehrdata/core/_compat.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/ehrdata/core/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
R_LAYER_KEY = "r_layer"
1+
R_LAYER_KEY = "R_layer"

0 commit comments

Comments
 (0)