-
Notifications
You must be signed in to change notification settings - Fork 545
Open
Labels
bugSomething isn't workingSomething isn't working
Description
it is running well on A100, but when I run it on B200, it throw a exception below.
I search about this, not sure, looks like Cellpose's GPU acceleration module has not been updated to support the Blackwell architecture.
is it possible supported in the future?
AcceleratorError Traceback (most recent call last)
Cell In[6], line 1
----> 1 img,final_mask = run_segmentation_pipeline(
2 image_path='[/data2/core-med1/public/Spatial_project/Xenium/tif/fullimage/AlphaSMA_Vimentin.tif](http://127.0.0.1:8891/lab/tree/project/xenium/code/public/Spatial_project/Xenium/tif/fullimage/AlphaSMA_Vimentin.tif)',
3 model_path='[/data2/core-med1/public/Spatial_project/Xenium/cellpose/Models/X4.2](http://127.0.0.1:8891/lab/tree/project/xenium/code/public/Spatial_project/Xenium/cellpose/Models/X4.2)',
4 patch_seg = True,
5 patch_size=20000,
6 patch_overlap=300,
7 flow_threshold=0.5,
8 cellprob_threshold=0,
9 tile_norm_blocksize=0,
10 selected_channels=[0],
11 patch_masks_save_path='./result/patch_masks_mem_v3.npz',
12 final_mask_save_path='./result/final_mask_mem_v3.tif'
13 )
15 # 2h 8m 48s
Cell In[3], line 274, in run_segmentation_pipeline(image_path, model_path, patch_seg, patch_size, patch_overlap, selected_channels, flow_threshold, cellprob_threshold, tile_norm_blocksize, patch_masks_save_path, final_mask_save_path)
272 pbar = tqdm(patches, desc="Infer patches", total=len(patches))
273 for patch in pbar:
--> 274 m, _, _ = model.eval(
275 patch,
276 batch_size=8,
277 flow_threshold=FLOW_THRESHOLD,
278 cellprob_threshold=CELLLPROB_THRESHOLD,
279 normalize={'tile_norm_blocksize': TILE_NORM_BLOCKSIZE}
280 )
281 masks.append(m.astype(np.uint32, copy=False))
282 pbar.update(1)
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/models.py:338](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/models.py#line=337), in CellposeModel.eval(self, x, batch_size, resample, channels, channel_axis, z_axis, normalize, invert, rescale, diameter, flow_threshold, cellprob_threshold, do_3D, anisotropy, flow3D_smooth, stitch_threshold, min_size, max_size_fraction, niter, augment, tile_overlap, bsize, compute_masks, progress)
336 niter_scale = 1 if image_scaling is None else image_scaling
337 niter = int(200[/](http://127.0.0.1:8891/niter_scale)[niter_scale](http://127.0.0.1:8891/niter_scale)) if niter is None or niter == 0 else niter
--> 338 masks = self._compute_masks(x.shape, dP, cellprob, flow_threshold=flow_threshold,
339 cellprob_threshold=cellprob_threshold, min_size=min_size,
340 max_size_fraction=max_size_fraction, niter=niter,
341 stitch_threshold=stitch_threshold, do_3D=do_3D)
342 else:
343 masks = np.zeros(0) #pass back zeros if not compute_masks
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/models.py:524](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/models.py#line=523), in CellposeModel._compute_masks(self, shape, dP, cellprob, flow_threshold, cellprob_threshold, min_size, max_size_fraction, niter, do_3D, stitch_threshold)
521 for i in iterator:
522 # turn off min_size for 3D stitching
523 min_size0 = min_size if stitch_threshold == 0 or nimg == 1 else -1
--> 524 outputs = dynamics.resize_and_compute_masks(
525 dP[:, i], cellprob[i],
526 niter=niter, cellprob_threshold=cellprob_threshold,
527 flow_threshold=flow_threshold, resize=resize,
528 min_size=min_size0, max_size_fraction=max_size_fraction,
529 device=self.device)
530 if i==0 and nimg > 1:
531 masks = np.zeros((nimg, shape[1], shape[2]), outputs.dtype)
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:610](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=609), in resize_and_compute_masks(dP, cellprob, niter, cellprob_threshold, flow_threshold, do_3D, min_size, max_size_fraction, resize, device)
587 def resize_and_compute_masks(dP, cellprob, niter=200, cellprob_threshold=0.0,
588 flow_threshold=0.4, do_3D=False, min_size=15,
589 max_size_fraction=0.4, resize=None, device=torch.device("cpu")):
590 """Compute masks using dynamics from dP and cellprob, and resizes masks if resize is not None.
591
592 Args:
(...)
608 tuple: A tuple containing the computed masks and the final pixel locations.
609 """
--> 610 mask = compute_masks(dP, cellprob, niter=niter,
611 cellprob_threshold=cellprob_threshold,
612 flow_threshold=flow_threshold, do_3D=do_3D,
613 max_size_fraction=max_size_fraction,
614 device=device)
616 if resize is not None:
617 dynamics_logger.warning("Resizing is depricated in v4.0.1+")
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:672](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=671), in compute_masks(dP, cellprob, p, niter, cellprob_threshold, flow_threshold, do_3D, min_size, max_size_fraction, device)
669 if not do_3D:
670 if mask.max() > 0 and flow_threshold is not None and flow_threshold > 0:
671 # make sure labels are unique at output of get_masks
--> 672 mask = remove_bad_flow_masks(mask, dP, threshold=flow_threshold,
673 device=device)
675 if mask.max() < 2**16 and mask.dtype != "uint16":
676 mask = mask.astype("uint16")
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:443](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=442), in remove_bad_flow_masks(masks, flows, threshold, device)
440 dynamics_logger.info("turn off QC step with flow_threshold=0 if too slow")
441 device0 = torch.device("cpu")
--> 443 merrors, _ = flow_error(masks, flows, device0)
444 badi = 1 + (merrors > threshold).nonzero()[0]
445 masks[np.isin(masks, badi)] = 0
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:300](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=299), in flow_error(maski, dP_net, device)
297 return
299 # flows predicted from estimated masks
--> 300 dP_masks = masks_to_flows_gpu(maski, device=device)
301 # difference between predicted flows vs mask flows
302 flow_errors = np.zeros(maski.max())
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:139](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=138), in masks_to_flows_gpu(masks, device, niter)
137 ### run diffusion
138 n_iter = 2 * ext.max() if niter is None else niter
--> 139 mu = _extend_centers_gpu(neighbors, meds_p, isneighbor, shape, n_iter=n_iter,
140 device=device)
141 mu = mu.astype("float64")
143 # new normalization
File [/data2/core-med1/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py:50](http://127.0.0.1:8891/lab/tree/project/xenium/code/miniconda/envs/cellpose_b200/lib/python3.10/site-packages/cellpose/dynamics.py#line=49), in _extend_centers_gpu(***failed resolving arguments***)
47 del meds, isneighbor, Tneigh
49 if T.ndim == 2:
---> 50 grads = T[neighbors[0, [2, 1, 4, 3]], neighbors[1, [2, 1, 4, 3]]]
51 del neighbors
52 dy = grads[0] - grads[1]
AcceleratorError: CUDA error: invalid configuration argument
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
Here is my torch version
import torch
print(torch.__version__)
print(torch.cuda.get_device_name(0))
print(torch.version.cuda)
2.8.0+cu128
NVIDIA B200
12.8
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working