-
Notifications
You must be signed in to change notification settings - Fork 34
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Describe the bug
It seems that the rapids_singlecell can't support dask in running rsc.pp.neighbors
Steps/Code to reproduce bug
def set_mem():
rmm.reinitialize(managed_memory=True)
cp.cuda.set_allocator(rmm_cupy_allocator)
preprocessing_gpus="0,1"
cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES=preprocessing_gpus)
client = Client(cluster)
client.run(set_mem)
client
alldata = sc.read_h5ad("/home/dd/DL/trainLog/2508_AL_Eval/Round_stage1_epoch140_trainLog_all_sc.hdf5")
SPARSE_CHUNK_SIZE = 20_000
shape = alldata.X.shape
# alldata.X = read_dask(alldata.X, (SPARSE_CHUNK_SIZE, shape[1]))
alldata.X = da.from_array(alldata.X, chunks=(SPARSE_CHUNK_SIZE, shape[1]))
rsc.get.anndata_to_GPU(alldata)
alldata.X = alldata.X.persist()
alldata.X.compute_chunk_sizes()

rsc.pp.highly_variable_genes(alldata, min_mean=0.0125, max_mean=3, min_disp=0.5, n_top_genes = 2048)
sc.pl.highly_variable_genes(alldata)
alldata = alldata[:, alldata.var.highly_variable]
rsc.pp.pca(alldata, svd_solver='covariance_eigh', n_comps=256)
sc.pl.pca_variance_ratio(alldata, log=False, n_pcs=30)
All above can run well, and then the error occurs.
If firstly tried use rsc.pp.neighbors
rsc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
and the error is
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[12], line 1
----> 1 rsc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
2 rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
3 n_components=2, maxiter=None, alpha=1.0,
4 gamma=1.0, negative_sample_rate=5, init_pos='spectral',
5 random_state=0)
6 sc.pl.umap(alldata)
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/rapids_singlecell/preprocessing/_neighbors.py:531, in neighbors(adata, n_neighbors, n_pcs, use_rep, random_state, algorithm, metric, metric_kwds, algorithm_kwds, key_added, copy)
528 adata._init_as_actual(adata.copy())
529 X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs)
--> 531 X_contiguous = _check_neighbors_X(X, algorithm)
532 _check_metrics(algorithm, metric)
534 n_obs = adata.shape[0]
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/rapids_singlecell/preprocessing/_neighbors.py:330, in _check_neighbors_X(X, algorithm)
328 X_contiguous = cp.ascontiguousarray(X, dtype=np.float32)
329 else:
--> 330 raise TypeError(
331 "Unsupported type for X. Expected ndarray or sparse matrix."
332 )
334 return X_contiguous
TypeError: Unsupported type for X. Expected ndarray or sparse matrix
Then I tried use thesc.pp.neighbors
and the umap went wrong:
sc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
n_components=2, maxiter=None, alpha=1.0,
gamma=1.0, negative_sample_rate=5, init_pos='spectral',
random_state=0)
--------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[13], line 1
----> 1 sc.pp.neighbors(alldata, n_neighbors=250, n_pcs=15, metric='cosine')
2 rsc.tl.umap(alldata, min_dist=0.3, spread=1.0,
3 n_components=2, maxiter=None, alpha=1.0,
4 gamma=1.0, negative_sample_rate=5, init_pos='spectral',
5 random_state=0)
6 sc.pl.umap(alldata)
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/scanpy/neighbors/__init__.py:194, in neighbors(adata, n_neighbors, n_pcs, use_rep, knn, method, transformer, metric, metric_kwds, random_state, key_added, copy)
192 adata._init_as_actual(adata.copy())
193 neighbors = Neighbors(adata)
--> 194 neighbors.compute_neighbors(
195 n_neighbors,
196 n_pcs=n_pcs,
197 use_rep=use_rep,
198 knn=knn,
199 method=method,
200 transformer=transformer,
201 metric=metric,
202 metric_kwds=metric_kwds,
203 random_state=random_state,
204 )
206 if key_added is None:
207 key_added = "neighbors"
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/scanpy/neighbors/__init__.py:587, in Neighbors.compute_neighbors(self, n_neighbors, n_pcs, use_rep, knn, method, transformer, metric, metric_kwds, random_state)
585 self.knn = knn
586 X = _choose_representation(self._adata, use_rep=use_rep, n_pcs=n_pcs)
--> 587 self._distances = transformer.fit_transform(X)
588 knn_indices, knn_distances = _get_indices_distances_from_sparse_matrix(
589 self._distances, n_neighbors
590 )
591 if shortcut:
592 # self._distances is a sparse matrix with a diag of 1, fix that
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:2256, in PyNNDescentTransformer.fit_transform(self, X, y, **fit_params)
2236 def fit_transform(self, X, y=None, **fit_params):
2237 """Fit to graph_data, then transform it.
2238
2239 Fits transformer to X and y with optional parameters fit_params
(...) 2254 The diagonal is always explicit.
2255 """
-> 2256 self.fit(X, compress_index=False)
2257 result = self.transform(X=None)
2259 if self.verbose:
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:2174, in PyNNDescentTransformer.fit(self, X, compress_index)
2170 # Compatibility with sklearn, which doesn't consider
2171 # a point its own neighbor for these purposes.
2172 effective_n_neighbors = self.n_neighbors + 1
-> 2174 self.index_ = NNDescent(
2175 X,
2176 metric=self.metric,
2177 metric_kwds=metric_kwds,
2178 n_neighbors=effective_n_neighbors,
2179 n_trees=self.n_trees,
2180 leaf_size=self.leaf_size,
2181 pruning_degree_multiplier=self.pruning_degree_multiplier,
2182 diversify_prob=self.diversify_prob,
2183 n_search_trees=self.n_search_trees,
2184 tree_init=self.tree_init,
2185 random_state=self.random_state,
2186 low_memory=self.low_memory,
2187 max_candidates=self.max_candidates,
2188 n_iters=self.n_iters,
2189 delta=self.early_termination_value,
2190 n_jobs=self.n_jobs,
2191 compressed=compress_index,
2192 parallel_batch_queries=self.parallel_batch_queries,
2193 verbose=self.verbose,
2194 )
2196 return self
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/pynndescent/pynndescent_.py:736, in NNDescent.__init__(self, data, metric, metric_kwds, n_neighbors, n_trees, leaf_size, pruning_degree_multiplier, diversify_prob, n_search_trees, tree_init, init_graph, init_dist, random_state, low_memory, max_candidates, max_rptree_depth, n_iters, delta, n_jobs, compressed, parallel_batch_queries, verbose)
734 self._input_dtype = np.uint8
735 else:
--> 736 data = check_array(data, dtype=np.float32, accept_sparse="csr", order="C")
737 self._input_dtype = np.float32
739 self._raw_data = data
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/validation.py:1053, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
1051 array = xp.astype(array, dtype, copy=False)
1052 else:
-> 1053 array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
1054 except ComplexWarning as complex_warning:
1055 raise ValueError(
1056 "Complex data not supported\n{}\n".format(array)
1057 ) from complex_warning
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/sklearn/utils/_array_api.py:757, in _asarray_with_order(array, dtype, order, copy, xp, device)
755 array = numpy.array(array, order=order, dtype=dtype)
756 else:
--> 757 array = numpy.asarray(array, order=order, dtype=dtype)
759 # At this point array is a NumPy ndarray. We convert it to an array
760 # container that is consistent with the input's namespace.
761 return xp.asarray(array)
File ~/Code/Jupyter/.pixi/envs/default/lib/python3.12/site-packages/dask/array/core.py:1737, in Array.__array__(self, dtype, copy, **kwargs)
1729 x = self.compute()
1731 # Apply requested dtype and convert non-numpy backends to numpy.
1732 # If copy is True, numpy is going to perform its own deep copy
1733 # after this method returns.
1734 # If copy is None, finalize() ensures that the returned object
1735 # does not share memory with an object stored in the graph or on a
1736 # process-local Worker.
-> 1737 return np.asarray(x, dtype=dtype)
File cupy/_core/core.pyx:1581, in cupy._core.core._ndarray_base.__array__()
TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly
I referred to the doc:
https://rapids-singlecell.readthedocs.io/en/latest/notebooks/06-multi_gpu_show.html
https://rapids-singlecell.readthedocs.io/en/latest/notebooks/05_out-of-core.html
But they all stoped at running pca. I think that this is because I use the dask.array, which may not support by sc.pp.neighbors
. However, I don't know how to solve this.
Is there any solutions or suggesstions? Or any available examples on the following steps i can follow?
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request