add better docs string

Intron7 · Intron7 · commit 4676c0707e80 · 2025-09-23T13:16:18.000+02:00
diff --git a/src/rapids_singlecell/preprocessing/_neighbors/__init__.py b/src/rapids_singlecell/preprocessing/_neighbors/__init__.py
@@ -169,22 +169,40 @@ def neighbors(
     metric_kwds
         Options for the metric.
     algorithm_kwds
-        Options for the algorithm. For 'ivfflat' and 'ivfpq' algorithms, the following
-        parameters can be specified:
+        Options for the algorithm.
+        For 'ivfflat' and 'ivfpq' algorithms, the following parameters can be specified:
+
         * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
+
         * 'n_probes': Number of lists to probe during search. Default is 20. Higher values
         increase accuracy but reduce speed.
+
         For 'nn_descent' algorithm, the following parameters can be specified:
+
         * 'intermediate_graph_degree': The degree of the intermediate graph. Default is None.
         It is recommended to set it to `>= 1.5 * n_neighbors`.
+
         For 'all_neighbors' algorithm, the following parameters can be specified:
+
         * 'algo': The algorithm to use. Valid options are: 'ivf_pq' and 'nn_descent'. Default is 'nn_descent'.
-        * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
+
+        * 'n_clusters': Number of clusters/batches to partition the dataset into (> overlap_factor). Default is number of GPUs.
+
+        * 'overlap_factor': Number of clusters each point is assigned to (must be < n_clusters). Default is 1.
+
+        * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)). Only available for 'ivf_pq' algorithm.
+
         * 'n_probes': Number of lists to probe during search. Default is 20. Higher values
-        increase accuracy but reduce speed.
+        increase accuracy but reduce speed. Only available for 'ivf_pq' algorithm.
+
+        * 'intermediate_graph_degree': The degree of the intermediate graph. Default is None. It is recommended to set it to `>= 1.5 * n_neighbors`. Only available for 'nn_descent' algorithm.
+
         For 'mg_ivfflat' and 'mg_ivfpq' algorithms, the following parameters can be specified:
-        * 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'distributed'. Default is 'replicated'.
+
+        * 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'shared'. Default is 'replicated'.
+
         * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
+
         * 'n_probes': Number of lists to probe during search. Default is 20. Higher values
         increase accuracy but reduce speed.
 
@@ -337,6 +355,12 @@ def bbknn(
         `'cagra'`
             Employs the Compressed, Accurate Graph-based search to quickly find nearest neighbors by traversing a graph structure.
 
+        `'mg_ivfflat'`
+            Uses the Multi-GPU inverted file indexing to partition the dataset into coarse quantizer cells and performs the search within the relevant cells.
+
+        `'mg_ivfpq'`
+            Combines Multi-GPU inverted file indexing with product quantization to encode sub-vectors of the dataset, facilitating faster distance computation.
+
         Please ensure that the chosen algorithm is compatible with your dataset and the specific requirements of your search problem.
     metric
         A known metric's name or a callable that returns a distance.
@@ -349,6 +373,16 @@ def bbknn(
         * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
         * 'nprobes': Number of lists to probe during search. Default is 1. Higher values
           increase accuracy but reduce speed.
+
+        For 'mg_ivfflat' and 'mg_ivfpq' algorithms, the following parameters can be specified:
+
+        * 'distribution_mode': The distribution mode to use. Valid options are: 'replicated' and 'shared'. Default is 'replicated'.
+
+        * 'n_lists': Number of inverted lists for IVF indexing. Default is 2 * next_power_of_2(sqrt(n_samples)).
+
+        * 'n_probes': Number of lists to probe during search. Default is 20. Higher values
+        increase accuracy but reduce speed.
+
     trim
         Trim the neighbours of each cell to these many top connectivities.
         May help with population independence and improve the tidiness of clustering.
diff --git a/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_all_neighbors.py b/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_all_neighbors.py
@@ -14,8 +14,8 @@
 
 
 def _all_neighbors_knn(
-    X: cp.ndarray,
-    Y: cp.ndarray,
+    X: np.ndarray,
+    Y: np.ndarray,
     k: int,
     *,
     metric: _Metrics,
@@ -71,7 +71,7 @@ def _all_neighbors_knn(
     neighbors = cp.zeros([X.shape[0], k], dtype=np.int64)
     distances = cp.zeros([X.shape[0], k], dtype=np.float32)
 
-    neighbors, distances = all_neighbors.build(
+    all_neighbors.build(
         dataset=X,
         k=k,
         params=build_params,
diff --git a/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfflat.py b/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfflat.py
@@ -9,12 +9,14 @@
 if TYPE_CHECKING:
     from collections.abc import Mapping
 
+    import numpy as np
+
     from rapids_singlecell.preprocessing._neighbors import _Metrics
 
 
 def _mg_ivf_flat_knn(
-    X: cp.ndarray,
-    Y: cp.ndarray,
+    X: np.ndarray,
+    Y: np.ndarray,
     k: int,
     *,
     metric: _Metrics,
@@ -29,6 +31,7 @@ def _mg_ivf_flat_knn(
             "Please update your cuvs installation."
         )
     distribution_mode = algorithm_kwds.get("distribution_mode", "replicated")
+    assert distribution_mode in ["replicated", "shared"], "Invalid distribution mode"
     n_lists = algorithm_kwds.get("n_lists", _compute_nlist(X.shape[0]))
     n_probes = algorithm_kwds.get("n_probes", 20)
     # Build multi-GPU index
diff --git a/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfpq.py b/src/rapids_singlecell/preprocessing/_neighbors/_algorithms/_mg_ivfpq.py
@@ -9,12 +9,14 @@
 if TYPE_CHECKING:
     from collections.abc import Mapping
 
+    import numpy as np
+
     from rapids_singlecell.preprocessing._neighbors import _Metrics
 
 
 def _mg_ivf_pq_knn(
-    X: cp.ndarray,
-    Y: cp.ndarray,
+    X: np.ndarray,
+    Y: np.ndarray,
     k: int,
     *,
     metric: _Metrics,