@@ -203,14 +203,14 @@ __global__ void erase(InputIt first,
203203
204204 std::size_t thread_num_successes = 0 ;
205205
206- auto tid = block_size * blockIdx .x + threadIdx .x ;
207- auto it = first + tid;
206+ auto tid = block_size * blockIdx .x + threadIdx .x ;
207+ auto it = first + tid;
208208
209- if (num_submaps > 1 ) {
210- for (int i = threadIdx .x ; i < num_submaps; i += block_size)
209+ if (num_submaps > 1 ) {
210+ for (int i = threadIdx .x ; i < num_submaps; i += block_size)
211211 submap_block_num_successes[i] = 0 ;
212212 __syncthreads ();
213-
213+
214214 while (it < last) {
215215 int i;
216216 for (i = 0 ; i < num_submaps; ++i) {
@@ -224,8 +224,7 @@ __global__ void erase(InputIt first,
224224 }
225225 } else {
226226 while (it < last) {
227- if (submap_mutable_views[0 ].erase (*it, hash, key_equal))
228- thread_num_successes++;
227+ if (submap_mutable_views[0 ].erase (*it, hash, key_equal)) thread_num_successes++;
229228 it += gridDim .x * blockDim .x ;
230229 }
231230 }
@@ -235,11 +234,11 @@ __global__ void erase(InputIt first,
235234 num_successes->fetch_add (block_num_successes, cuda::std::memory_order_relaxed);
236235 }
237236
238- if (num_submaps > 1 ) {
239- for (int i = 0 ; i < num_submaps; ++i) {
240- if (threadIdx .x == 0 ) {
241- submap_num_successes[i]->fetch_add (
242- static_cast <std:: size_t >(submap_block_num_successes[i]), cuda::std::memory_order_relaxed);
237+ if (num_submaps > 1 ) {
238+ for (int i = 0 ; i < num_submaps; ++i) {
239+ if (threadIdx .x == 0 ) {
240+ submap_num_successes[i]->fetch_add (static_cast <std:: size_t >(submap_block_num_successes[i]),
241+ cuda::std::memory_order_relaxed);
243242 }
244243 }
245244 }
@@ -255,14 +254,14 @@ template <uint32_t block_size,
255254 typename Hash,
256255 typename KeyEqual>
257256__global__ void erase (InputIt first,
258- InputIt last,
259- viewT* submap_views,
260- mutableViewT* submap_mutable_views,
261- atomicT* num_successes,
262- atomicT** submap_num_successes,
263- const uint32_t num_submaps,
264- Hash hash,
265- KeyEqual key_equal)
257+ InputIt last,
258+ viewT* submap_views,
259+ mutableViewT* submap_mutable_views,
260+ atomicT* num_successes,
261+ atomicT** submap_num_successes,
262+ const uint32_t num_submaps,
263+ Hash hash,
264+ KeyEqual key_equal)
266265{
267266 typedef cub::BlockReduce<std::size_t , block_size> BlockReduce;
268267 __shared__ typename BlockReduce::TempStorage temp_storage;
@@ -274,13 +273,13 @@ __global__ void erase(InputIt first,
274273 auto tid = block_size * blockIdx .x + threadIdx .x ;
275274 auto it = first + tid / tile_size;
276275
277- if (num_submaps > 1 ) {
278- for (int i = threadIdx .x ; i < num_submaps; i += block_size)
276+ if (num_submaps > 1 ) {
277+ for (int i = threadIdx .x ; i < num_submaps; i += block_size)
279278 submap_block_num_successes[i] = 0 ;
280279 __syncthreads ();
281-
280+
282281 while (it < last) {
283- auto erased = false ;
282+ auto erased = false ;
284283 int i;
285284 for (i = 0 ; i < num_submaps; ++i) {
286285 erased = submap_mutable_views[i].erase (tile, *it, hash, key_equal);
@@ -295,8 +294,7 @@ __global__ void erase(InputIt first,
295294 } else {
296295 while (it < last) {
297296 auto erased = submap_mutable_views[0 ].erase (tile, *it, hash, key_equal);
298- if (erased && tile.thread_rank () == 0 )
299- thread_num_successes++;
297+ if (erased && tile.thread_rank () == 0 ) thread_num_successes++;
300298
301299 it += (gridDim .x * blockDim .x ) / tile_size;
302300 }
@@ -307,11 +305,11 @@ __global__ void erase(InputIt first,
307305 num_successes->fetch_add (block_num_successes, cuda::std::memory_order_relaxed);
308306 }
309307
310- if (num_submaps > 1 ) {
311- for (int i = 0 ; i < num_submaps; ++i) {
312- if (threadIdx .x == 0 ) {
313- submap_num_successes[i]->fetch_add (
314- static_cast <std:: size_t >(submap_block_num_successes[i]), cuda::std::memory_order_relaxed);
308+ if (num_submaps > 1 ) {
309+ for (int i = 0 ; i < num_submaps; ++i) {
310+ if (threadIdx .x == 0 ) {
311+ submap_num_successes[i]->fetch_add (static_cast <std:: size_t >(submap_block_num_successes[i]),
312+ cuda::std::memory_order_relaxed);
315313 }
316314 }
317315 }
0 commit comments