Skip to content

Commit 8fafce8

Browse files
authored
Merge pull request #41 from quadbio/docs/improve_docstrings
Decrease docstring duplication
2 parents 0ab898e + a5db587 commit 8fafce8

16 files changed

+728
-297
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ classifiers = [
2424
dynamic = [ "version" ]
2525
dependencies = [
2626
"anndata>=0.11",
27+
"docrep",
2728
"numpy",
2829
"packaging",
2930
"pandas",
@@ -56,6 +57,7 @@ optional-dependencies.doc = [
5657
]
5758
optional-dependencies.test = [
5859
"coverage",
60+
"faiss-cpu",
5961
"pytest",
6062
"squidpy>=1.6",
6163
]

src/cellmapper/_docs.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Shared documentation for cellmapper."""
2+
3+
from docrep import DocstringProcessor
4+
5+
__all__ = ["d"]
6+
7+
_t = """\
8+
t
9+
Number of diffusion time steps. This parameter controls the degree of
10+
smoothing applied by the diffusion operator. Larger values lead to more
11+
smoothing."""
12+
13+
_diffusion_method = """\
14+
diffusion_method
15+
Method for computing powers of the mapping matrix (only valid in self-mapping mode). Options are "iterative" for
16+
iterative matrix multiplication (inspired by MAGIC :cite:`van2018recovering`) and "spectral" for
17+
eigendecomposition-based approach. """
18+
19+
_prediction_postfix = """\
20+
prediction_postfix
21+
Postfix to add to mapped variables to identify them as predictions."""
22+
23+
_symmetrize = """\
24+
symmetrize
25+
If True, create a symmetrize connectivity matrix. Only valid for square matrices (self-mapping).
26+
If None (default), uses True for self-mapping and False for cross-mapping."""
27+
28+
_self_edges = """\
29+
self_edges
30+
Control self-edges (diagonal entries) for square matrices (self-mapping).
31+
If None (default), uses False for self-mapping (scanpy style) and None for cross-mapping.
32+
This controls whether or not the kernel used to compute the connectivities is supplied with self-edges.
33+
It does not determine whether the final connectivity matrix has self edges. For example, the `umap`
34+
kernel expectes self-edges, but does not produce them in the final connectivity matrix."""
35+
36+
_knn_method = """\
37+
knn_method
38+
Method for computing k-nearest neighbors. Options include:
39+
- "sklearn": Scikit-learn's NearestNeighbors. See https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html
40+
- "pynndescent": Pynndescent's approximate nearest neighbors. See https://pynndescent.readthedocs.io/en/latest/
41+
- "rapids": RAPIDS cuML's NearestNeighbors (GPU). See https://docs.rapids.ai/api/cuml/stable/api.html#cuml.neighbors.NearestNeighbors
42+
- "faiss-cpu": Facebook AI Similarity Search (FAISS) on CPU. See https://faiss.ai/
43+
- "faiss-gpu": Facebook AI Similarity Search (FAISS) on GPU. See https://faiss.ai/
44+
45+
46+
All methods return exactly `n_neighbors` neighbors, including the reference cell itself (in self-mapping mode)."""
47+
48+
_only_yx = """\
49+
only_yx
50+
If True, only compute the xy neighbors. In self-mapping mode, this is
51+
automatically set to True for efficiency since all neighbor matrices contain the same information.
52+
This is faster, but not suitable for Jaccard or HNOCA methods in cross-mapping mode."""
53+
54+
_kernel_method = """\
55+
kernel_method
56+
Method to use for computing the mapping matrix. Options include:
57+
58+
- "jaccard": Jaccard similarity. Inspired by GLUE :cite:`cao2022multi`
59+
- "gauss": Gaussian kernel with (global) bandwith equal to the mean distance.
60+
- "scarches": scArches kernel. Inspired by scArches :cite:`lotfollahi2022mapping`
61+
- "inverse_distance": Inverse distance kernel.
62+
- "random": Random kernel, useful for testing.
63+
- "hnoca": HNOCA kernel. Inspired by HNOCA-tools :cite:`he2024integrated`
64+
- "equal": All neighbors are equally weighted (1/n_neighbors).
65+
- "umap": UMAP fuzzy simplicial set connectivities. Only available for self-mapping with true k-NN graphs."""
66+
67+
_comparison_method = """\
68+
comparison_method
69+
Method to use for comparing the mapping results. Options include:
70+
71+
- "pearson": Pearson correlation coefficient.
72+
- "spearman": Spearman rank correlation coefficient.
73+
- "js": Jenson-Shanon divergence.
74+
- "rmse": Root Mean Square Error."""
75+
76+
_layer_key = """\
77+
layer_key
78+
Key in `self.query.layers` to use as the original expression. Use "X" to use `self.query.X`."""
79+
80+
81+
_n_neighbors = """\
82+
n_neighbors
83+
Number of nearest neighbors. This parameter controls the sparsity of the connectivity matrix. """
84+
85+
_use_rep = """\
86+
use_rep
87+
Data representation based on which to find nearest neighbors. If None, a fallback representation will be
88+
computed automatically. """
89+
90+
_knn_dist_metric = """\
91+
knn_dist_metric
92+
Distance metric to use for nearest neighbors. See the knn algorithms documentation for details. """
93+
94+
95+
d = DocstringProcessor(
96+
t=_t,
97+
diffusion_method=_diffusion_method,
98+
prediction_postfix=_prediction_postfix,
99+
symmetrize=_symmetrize,
100+
self_edges=_self_edges,
101+
knn_method=_knn_method,
102+
only_yx=_only_yx,
103+
kernel_method=_kernel_method,
104+
comparison_method=_comparison_method,
105+
layer_key=_layer_key,
106+
n_neighbors=_n_neighbors,
107+
use_rep=_use_rep,
108+
knn_dist_metric=_knn_dist_metric,
109+
)

src/cellmapper/check.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,18 @@ def check(self) -> None:
6161
"https://docs.rapids.ai/install/.",
6262
cupy="To speed up k-NN search on GPU, you may install cuPy following the guide from "
6363
"https://docs.rapids.ai/install/.",
64-
faiss="To speed up k-NN search on GPU, you may install faiss following the guide from "
64+
faiss_cpu="To speed up k-NN search on CPU, you may install faiss following the guide from "
65+
"https://github.com/facebookresearch/faiss/blob/main/INSTALL.md",
66+
faiss_gpu="To speed up k-NN search on GPU, you may install faiss following the guide from "
6567
"https://github.com/facebookresearch/faiss/blob/main/INSTALL.md",
6668
pynndescent="To use fast approximate k-NN search, install pynndescent: pip install pynndescent",
6769
)
6870

6971
CHECKERS = {
7072
"cuml": Checker("cuml", vmin=None, install_hint=INSTALL_HINTS.cuml),
7173
"cupy": Checker("cupy", vmin=None, install_hint=INSTALL_HINTS.cupy),
72-
"faiss": Checker("faiss", package_name="faiss", vmin="1.7.0", install_hint=INSTALL_HINTS.faiss),
74+
"faiss-cpu": Checker("faiss", package_name="faiss-cpu", vmin="1.7.0", install_hint=INSTALL_HINTS.faiss_cpu),
75+
"faiss-gpu": Checker("faiss", package_name="faiss", vmin="1.7.0", install_hint=INSTALL_HINTS.faiss_gpu),
7376
"pynndescent": Checker("pynndescent", vmin=None, install_hint=INSTALL_HINTS.pynndescent),
7477
}
7578

src/cellmapper/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ class PackageConstants:
66
SKLEARN_WARNING_CUTOFF: int = 50000
77

88
# Default mapping methods
9-
DEFAULT_SELF_MAPPING_METHOD: str = "umap"
10-
DEFAULT_CROSS_MAPPING_METHOD: str = "gauss"
9+
DEFAULT_SELF_MAPPING_KERNEL_METHOD: str = "umap"
10+
DEFAULT_CROSS_MAPPING_KERNEL_METHOD: str = "gauss"
1111

1212
# Kernel method categories
1313
JACCARD_BASED_KERNELS = {"jaccard", "hnoca"}

src/cellmapper/model/_knn_backend.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,44 @@ def query(self, points: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray]:
5050
return distances, indices
5151

5252

53-
class _FaissBackend(_KNNBackend):
53+
class _FaissCpuBackend(_KNNBackend):
5454
def __init__(
5555
self,
5656
n_neighbors: int,
5757
metric: str,
5858
random_state: int = 0,
5959
**kwargs: Any,
6060
):
61-
check_deps("faiss")
61+
check_deps("faiss-cpu")
62+
import faiss
63+
64+
self.faiss = faiss
65+
self._index = None
66+
67+
def fit(self, data: np.ndarray) -> None:
68+
dims = data.shape[1]
69+
index = self.faiss.IndexFlatL2(dims)
70+
# Ensure data is float32 and C-contiguous
71+
data_f32 = np.ascontiguousarray(data.astype(np.float32))
72+
index.add(data_f32)
73+
self._index = index
74+
75+
def query(self, points: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray]:
76+
# Ensure points are float32 and C-contiguous
77+
points_f32 = np.ascontiguousarray(points.astype(np.float32))
78+
distances, indices = self._index.search(points_f32, k)
79+
return distances, indices
80+
81+
82+
class _FaissGpuBackend(_KNNBackend):
83+
def __init__(
84+
self,
85+
n_neighbors: int,
86+
metric: str,
87+
random_state: int = 0,
88+
**kwargs: Any,
89+
):
90+
check_deps("faiss-gpu")
6291
import faiss
6392

6493
self.faiss = faiss
@@ -69,11 +98,15 @@ def fit(self, data: np.ndarray) -> None:
6998
dims = data.shape[1]
7099
flat = self.faiss.IndexFlatL2(dims)
71100
gpu_index = self.faiss.index_cpu_to_gpu(self.res, 0, flat)
72-
gpu_index.add(data.astype(np.float32))
101+
# Ensure data is float32 and C-contiguous
102+
data_f32 = np.ascontiguousarray(data.astype(np.float32))
103+
gpu_index.add(data_f32)
73104
self._index = gpu_index
74105

75106
def query(self, points: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray]:
76-
distances, indices = self._index.search(points.astype(np.float32), k)
107+
# Ensure points are float32 and C-contiguous
108+
points_f32 = np.ascontiguousarray(points.astype(np.float32))
109+
distances, indices = self._index.search(points_f32, k)
77110
return distances, indices
78111

79112

@@ -148,16 +181,17 @@ def query(self, points: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray]:
148181

149182
_BACKENDS = {
150183
"sklearn": _SklearnBackend,
151-
"faiss": _FaissBackend,
184+
"faiss-cpu": _FaissCpuBackend,
185+
"faiss-gpu": _FaissGpuBackend,
152186
"rapids": _RapidsBackend,
153187
"pynndescent": _PyNNDescentBackend,
154188
}
155189

156190

157-
def get_backend(method: str, n_neighbors: int, metric: str, random_state: int = 0, **kwargs: Any) -> _KNNBackend:
191+
def get_backend(knn_method: str, n_neighbors: int, metric: str, random_state: int = 0, **kwargs: Any) -> _KNNBackend:
158192
"""Factory to get a configured KNN backend."""
159193
try:
160-
backend_cls = _BACKENDS[method]
194+
backend_cls = _BACKENDS[knn_method]
161195
except KeyError:
162-
raise ValueError(f"Unknown method: {method}. Supported methods: {list(_BACKENDS)}") from KeyError
196+
raise ValueError(f"Unknown method: {knn_method}. Supported methods: {list(_BACKENDS)}") from KeyError
163197
return backend_cls(n_neighbors=n_neighbors, metric=metric, random_state=random_state, **kwargs)

0 commit comments

Comments
 (0)