We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b33860f commit 5ddc143Copy full SHA for 5ddc143
1 file changed
tmp_scripts/utils/sepal_gpu.py
@@ -162,10 +162,9 @@ def _cuda_kernel_diffusion_gpu(
162
results_all = cp.full(n_genes, -999999.0, dtype=cp.float64) # Results for ALL genes
163
164
# Calculate shared memory (fixed size per block, independent of n_cells)
165
- tile_size = 1024 # Fixed tile size for scalability
166
min_blocks = 256 # Hardware-specific minimum
167
blocks_per_grid = max(n_genes, min_blocks)
168
- shared_mem_size = tile_size * 2 * 8 # 2 double arrays per tile
+ shared_mem_size = threads_per_block * 2 * 8 # 2 double arrays per thread
169
170
171
# **SINGLE KERNEL LAUNCH FOR ALL GENES**
0 commit comments