Skip to content

[FEA] dask.array don't support rsc.pp.neighbors #456

@dicklim

Description

@dicklim

Describe the bug
It seems that the rapids_singlecell can't support dask in running rsc.pp.neighbors

Steps/Code to reproduce bug

def set_mem():
    rmm.reinitialize(managed_memory=True)
    cp.cuda.set_allocator(rmm_cupy_allocator)

preprocessing_gpus="0,1"
cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES=preprocessing_gpus)
client = Client(cluster)
client.run(set_mem)
client

alldata = sc.read_h5ad("/home/dd/DL/trainLog/2508_AL_Eval/Round_stage1_epoch140_trainLog_all_sc.hdf5")
SPARSE_CHUNK_SIZE = 20_000
shape = alldata.X.shape
# alldata.X = read_dask(alldata.X, (SPARSE_CHUNK_SIZE, shape[1]))
alldata.X = da.from_array(alldata.X, chunks=(SPARSE_CHUNK_SIZE, shape[1]))

rsc.get.anndata_to_GPU(alldata)
alldata.X = alldata.X.persist()
alldata.X.compute_chunk_sizes()
Image
rsc.pp.highly_variable_genes(alldata, min_mean=0.0125, max_mean=3, min_disp=0.5, n_top_genes = 2048)
sc.pl.highly_variable_genes(alldata)

alldata = alldata[:, alldata.var.highly_variable]

rsc.pp.pca(alldata, svd_solver='covariance_eigh', n_comps=256)
sc.pl.pca_variance_ratio(alldata, log=False, n_pcs=30)

All above can run well, and then the error occurs.
If firstly tried use rsc.pp.neighbors

rsc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')

and the error is

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[12], line 1
----> 1 rsc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
      2 rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
      3      n_components=2, maxiter=None, alpha=1.0,
      4      gamma=1.0, negative_sample_rate=5, init_pos='spectral',
      5      random_state=0)
      6 sc.pl.umap(alldata)

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/rapids_singlecell/preprocessing/_neighbors.py:531, in neighbors(adata, n_neighbors, n_pcs, use_rep, random_state, algorithm, metric, metric_kwds, algorithm_kwds, key_added, copy)
    528     adata._init_as_actual(adata.copy())
    529 X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
--> 531 X_contiguous = _check_neighbors_X(X, algorithm)
    532 _check_metrics(algorithm, metric)
    534 n_obs = adata.shape[0]

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/rapids_singlecell/preprocessing/_neighbors.py:330, in _check_neighbors_X(X, algorithm)
    328         X_contiguous = cp.ascontiguousarray(X, dtype=np.float32)
    329     else:
--> 330         raise TypeError(
    331             "Unsupported type for X. Expected ndarray or sparse matrix."
    332         )
    334 return X_contiguous

TypeError: Unsupported type for X. Expected ndarray or sparse matrix

Then I tried use thesc.pp.neighbors
and the umap went wrong:

sc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
     n_components=2, maxiter=None, alpha=1.0,
     gamma=1.0, negative_sample_rate=5, init_pos='spectral',
     random_state=0)
--------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 sc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
      2 rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
      3      n_components=2, maxiter=None, alpha=1.0,
      4      gamma=1.0, negative_sample_rate=5, init_pos='spectral',
      5      random_state=0)
      6 sc.pl.umap(alldata)

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/scanpy/neighbors/__init__.py:194, in neighbors(adata, n_neighbors, n_pcs, use_rep, knn, method, transformer, metric, metric_kwds, random_state, key_added, copy)
    192     adata._init_as_actual(adata.copy())
    193 neighbors = Neighbors(adata)
--> 194 neighbors.compute_neighbors(
    195     n_neighbors,
    196     n_pcs=n_pcs,
    197     use_rep=use_rep,
    198     knn=knn,
    199     method=method,
    200     transformer=transformer,
    201     metric=metric,
    202     metric_kwds=metric_kwds,
    203     random_state=random_state,
    204 )
    206 if key_added is None:
    207     key_added = "neighbors"

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/scanpy/neighbors/__init__.py:587, in Neighbors.compute_neighbors(self, n_neighbors, n_pcs, use_rep, knn, method, transformer, metric, metric_kwds, random_state)
    585 self.knn = knn
    586 X = _choose_representation(self._adata, use_rep=use_rep, n_pcs=n_pcs)
--> 587 self._distances = transformer.fit_transform(X)
    588 knn_indices, knn_distances = _get_indices_distances_from_sparse_matrix(
    589     self._distances, n_neighbors
    590 )
    591 if shortcut:
    592     # self._distances is a sparse matrix with a diag of 1, fix that

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> 316     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:2256, in PyNNDescentTransformer.fit_transform(self, X, y, **fit_params)
   2236 def fit_transform(self, X, y=None, **fit_params):
   2237     """Fit to graph_data, then transform it.
   2238 
   2239     Fits transformer to X and y with optional parameters fit_params
   (...)   2254         The diagonal is always explicit.
   2255     """
-> 2256     self.fit(X, compress_index=False)
   2257     result = self.transform(X=None)
   2259     if self.verbose:

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:2174, in PyNNDescentTransformer.fit(self, X, compress_index)
   2170 # Compatibility with sklearn, which doesn't consider
   2171 # a point its own neighbor for these purposes.
   2172 effective_n_neighbors = self.n_neighbors + 1
-> 2174 self.index_ = NNDescent(
   2175     X,
   2176     metric=self.metric,
   2177     metric_kwds=metric_kwds,
   2178     n_neighbors=effective_n_neighbors,
   2179     n_trees=self.n_trees,
   2180     leaf_size=self.leaf_size,
   2181     pruning_degree_multiplier=self.pruning_degree_multiplier,
   2182     diversify_prob=self.diversify_prob,
   2183     n_search_trees=self.n_search_trees,
   2184     tree_init=self.tree_init,
   2185     random_state=self.random_state,
   2186     low_memory=self.low_memory,
   2187     max_candidates=self.max_candidates,
   2188     n_iters=self.n_iters,
   2189     delta=self.early_termination_value,
   2190     n_jobs=self.n_jobs,
   2191     compressed=compress_index,
   2192     parallel_batch_queries=self.parallel_batch_queries,
   2193     verbose=self.verbose,
   2194 )
   2196 return self

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:736, in NNDescent.__init__(self, data, metric, metric_kwds, n_neighbors, n_trees, leaf_size, pruning_degree_multiplier, diversify_prob, n_search_trees, tree_init, init_graph, init_dist, random_state, low_memory, max_candidates, max_rptree_depth, n_iters, delta, n_jobs, compressed, parallel_batch_queries, verbose)
    734     self._input_dtype = np.uint8
    735 else:
--> 736     data = check_array(data, dtype=np.float32, accept_sparse="csr", order="C")
    737     self._input_dtype = np.float32
    739 self._raw_data = data

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/validation.py:1053, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
   1051         array = xp.astype(array, dtype, copy=False)
   1052     else:
-> 1053         array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
   1054 except ComplexWarning as complex_warning:
   1055     raise ValueError(
   1056         "Complex data not supported\n{}\n".format(array)
   1057     ) from complex_warning

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/_array_api.py:757, in _asarray_with_order(array, dtype, order, copy, xp, device)
    755     array = numpy.array(array, order=order, dtype=dtype)
    756 else:
--> 757     array = numpy.asarray(array, order=order, dtype=dtype)
    759 # At this point array is a NumPy ndarray. We convert it to an array
    760 # container that is consistent with the input's namespace.
    761 return xp.asarray(array)

File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/dask/array/core.py:1737, in Array.__array__(self, dtype, copy, **kwargs)
   1729 x = self.compute()
   1731 # Apply requested dtype and convert non-numpy backends to numpy.
   1732 # If copy is True, numpy is going to perform its own deep copy
   1733 # after this method returns.
   1734 # If copy is None, finalize() ensures that the returned object
   1735 # does not share memory with an object stored in the graph or on a
   1736 # process-local Worker.
-> 1737 return np.asarray(x, dtype=dtype)

File cupy/_core/core.pyx:1581, in cupy._core.core._ndarray_base.__array__()

TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly

I referred to the doc:
https://rapids-singlecell.readthedocs.io/en/latest/notebooks/06-multi_gpu_show.html
https://rapids-singlecell.readthedocs.io/en/latest/notebooks/05_out-of-core.html

But they all stoped at running pca. I think that this is because I use the dask.array, which may not support by sc.pp.neighbors. However, I don't know how to solve this.

Is there any solutions or suggesstions? Or any available examples on the following steps i can follow?

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions