Skip to content

Commit 4676c07

Browse files
committed
add better docs string
1 parent 6efdacf commit 4676c07

4 files changed

Lines changed: 51 additions & 12 deletions

File tree

src/rapids_singlecell/preprocessing/_neighbors/__init__.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,22 +169,40 @@ def neighbors(
169169
metric_kwds
170170
Options for the metric.
171171
algorithm_kwds
172-
Options for the algorithm. For 'ivfflat' and 'ivfpq' algorithms, the following
173-
parameters can be specified:
172+
Options for the algorithm.
173+
For 'ivfflat' and 'ivfpq' algorithms, the following parameters can be specified:
174+
174175
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
176+
175177
* 'n_probes': Number of lists to probe during search. Default is 20. Higher values
176178
increase accuracy but reduce speed.
179+
177180
For 'nn_descent' algorithm, the following parameters can be specified:
181+
178182
* 'intermediate_graph_degree': The degree of the intermediate graph. Default is None.
179183
It is recommended to set it to `>= 1.5 * n_neighbors`.
184+
180185
For 'all_neighbors' algorithm, the following parameters can be specified:
186+
181187
* 'algo': The algorithm to use. Valid options are: 'ivf_pq' and 'nn_descent'. Default is 'nn_descent'.
182-
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
188+
189+
* 'n_clusters': Number of clusters/batches to partition the dataset into (> overlap_factor). Default is number of GPUs.
190+
191+
* 'overlap_factor': Number of clusters each point is assigned to (must be < n_clusters). Default is 1.
192+
193+
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)). Only available for 'ivf_pq' algorithm.
194+
183195
* 'n_probes': Number of lists to probe during search. Default is 20. Higher values
184-
increase accuracy but reduce speed.
196+
increase accuracy but reduce speed. Only available for 'ivf_pq' algorithm.
197+
198+
* 'intermediate_graph_degree': The degree of the intermediate graph. Default is None. It is recommended to set it to `>= 1.5 * n_neighbors`. Only available for 'nn_descent' algorithm.
199+
185200
For 'mg_ivfflat' and 'mg_ivfpq' algorithms, the following parameters can be specified:
186-
* 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'distributed'. Default is 'replicated'.
201+
202+
* 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'shared'. Default is 'replicated'.
203+
187204
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
205+
188206
* 'n_probes': Number of lists to probe during search. Default is 20. Higher values
189207
increase accuracy but reduce speed.
190208
@@ -337,6 +355,12 @@ def bbknn(
337355
`'cagra'`
338356
Employs the Compressed, Accurate Graph-based search to quickly find nearest neighbors by traversing a graph structure.
339357
358+
`'mg_ivfflat'`
359+
Uses the Multi-GPU inverted file indexing to partition the dataset into coarse quantizer cells and performs the search within the relevant cells.
360+
361+
`'mg_ivfpq'`
362+
Combines Multi-GPU inverted file indexing with product quantization to encode sub-vectors of the dataset, facilitating faster distance computation.
363+
340364
Please ensure that the chosen algorithm is compatible with your dataset and the specific requirements of your search problem.
341365
metric
342366
A known metric's name or a callable that returns a distance.
@@ -349,6 +373,16 @@ def bbknn(
349373
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
350374
* 'nprobes': Number of lists to probe during search. Default is 1. Higher values
351375
increase accuracy but reduce speed.
376+
377+
For 'mg_ivfflat' and 'mg_ivfpq' algorithms, the following parameters can be specified:
378+
379+
* 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'shared'. Default is 'replicated'.
380+
381+
* 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
382+
383+
* 'n_probes': Number of lists to probe during search. Default is 20. Higher values
384+
increase accuracy but reduce speed.
385+
352386
trim
353387
Trim the neighbours of each cell to these many top connectivities.
354388
May help with population independence and improve the tidiness of clustering.

src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_all_neighbors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515

1616
def _all_neighbors_knn(
17-
X: cp.ndarray,
18-
Y: cp.ndarray,
17+
X: np.ndarray,
18+
Y: np.ndarray,
1919
k: int,
2020
*,
2121
metric: _Metrics,
@@ -71,7 +71,7 @@ def _all_neighbors_knn(
7171
neighbors = cp.zeros([X.shape[0], k], dtype=np.int64)
7272
distances = cp.zeros([X.shape[0], k], dtype=np.float32)
7373

74-
neighbors, distances = all_neighbors.build(
74+
all_neighbors.build(
7575
dataset=X,
7676
k=k,
7777
params=build_params,

src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfflat.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
if TYPE_CHECKING:
1010
from collections.abc import Mapping
1111

12+
import numpy as np
13+
1214
from rapids_singlecell.preprocessing._neighbors import _Metrics
1315

1416

1517
def _mg_ivf_flat_knn(
16-
X: cp.ndarray,
17-
Y: cp.ndarray,
18+
X: np.ndarray,
19+
Y: np.ndarray,
1820
k: int,
1921
*,
2022
metric: _Metrics,
@@ -29,6 +31,7 @@ def _mg_ivf_flat_knn(
2931
"Please update your cuvs installation."
3032
)
3133
distribution_mode = algorithm_kwds.get("distribution_mode", "replicated")
34+
assert distribution_mode in ["replicated", "shared"], "Invalid distribution mode"
3235
n_lists = algorithm_kwds.get("n_lists", _compute_nlist(X.shape[0]))
3336
n_probes = algorithm_kwds.get("n_probes", 20)
3437
# Build multi-GPU index

src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfpq.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
if TYPE_CHECKING:
1010
from collections.abc import Mapping
1111

12+
import numpy as np
13+
1214
from rapids_singlecell.preprocessing._neighbors import _Metrics
1315

1416

1517
def _mg_ivf_pq_knn(
16-
X: cp.ndarray,
17-
Y: cp.ndarray,
18+
X: np.ndarray,
19+
Y: np.ndarray,
1820
k: int,
1921
*,
2022
metric: _Metrics,

0 commit comments

Comments
 (0)