@@ -53,11 +53,11 @@ fn attempt_lookback(
53
53
){
54
54
let payload : u32 = atomicLoad (& reduced [lookback_ix ][member_ix ]);
55
55
let flag_value : u32 = payload & FLAG_MASK;
56
- if ( flag_value == FLAG_REDUCTION) {
56
+ if flag_value == FLAG_REDUCTION {
57
57
*spin_count = 0u ;
58
58
*prev += payload >> 2u ;
59
59
*reduction_complete = true ;
60
- } else if ( flag_value == FLAG_INCLUSIVE) {
60
+ } else if flag_value == FLAG_INCLUSIVE {
61
61
*spin_count = 0u ;
62
62
*prev += payload >> 2u ;
63
63
atomicStore (& reduced [part_ix ][member_ix ], ((aggregate + *prev ) << 2u ) | FLAG_INCLUSIVE);
@@ -77,12 +77,12 @@ fn fallback(
77
77
){
78
78
let fallback_payload = (fallback_aggregate << 2u ) | select (FLAG_INCLUSIVE, FLAG_REDUCTION, fallback_ix != 0u );
79
79
let prev_payload = atomicMax (& reduced [fallback_ix ][member_ix ], fallback_payload );
80
- if ( prev_payload == 0u ) {
80
+ if prev_payload == 0u {
81
81
*prev += fallback_aggregate ;
82
82
} else {
83
83
*prev += prev_payload >> 2u ;
84
84
}
85
- if ( fallback_ix == 0u || (prev_payload & FLAG_MASK) == FLAG_INCLUSIVE) {
85
+ if fallback_ix == 0u || (prev_payload & FLAG_MASK) == FLAG_INCLUSIVE {
86
86
atomicStore (& reduced [part_ix ][member_ix ], ((aggregate + *prev ) << 2u ) | FLAG_INCLUSIVE);
87
87
sh_tag_broadcast [member_ix ] = *prev ;
88
88
*inclusive_complete = true ;
@@ -94,8 +94,8 @@ fn main(
94
94
@builtin (local_invocation_id ) local_id : vec3 <u32 >,
95
95
) {
96
96
//acquire the partition index, set the lock
97
- if ( local_id . x == 0u ) {
98
- sh_broadcast = atomicAdd (& scan_bump [0u ], 1u );
97
+ if local_id . x == 0u {
98
+ sh_broadcast = atomicAdd (& scan_bump [0 ], 1u );
99
99
sh_lock = LOCKED ;
100
100
}
101
101
workgroupBarrier ();
@@ -121,7 +121,7 @@ fn main(
121
121
122
122
//Broadcast the results and flag into device memory
123
123
if local_id . x == WG_SIZE - 1u {
124
- if ( part_ix != 0u ) {
124
+ if part_ix != 0u {
125
125
atomicStore (& reduced [part_ix ][0 ], (agg [0 ] << 2u ) | FLAG_REDUCTION);
126
126
atomicStore (& reduced [part_ix ][1 ], (agg [1 ] << 2u ) | FLAG_REDUCTION);
127
127
atomicStore (& reduced [part_ix ][2 ], (agg [2 ] << 2u ) | FLAG_REDUCTION);
@@ -137,7 +137,7 @@ fn main(
137
137
}
138
138
139
139
//Lookback and potentially fallback
140
- if ( part_ix != 0u ) {
140
+ if part_ix != 0u {
141
141
var lookback_ix = part_ix - 1u ;
142
142
143
143
var inc0 : bool = false ;
@@ -163,76 +163,76 @@ fn main(
163
163
164
164
//Lookback, with a single thread
165
165
//Last thread in the workgroup has the complete aggregate
166
- if ( local_id . x == WG_SIZE - 1u ) {
167
- for (var spin_count : u32 = 0u ; spin_count < MAX_SPIN_COUNT; ){
166
+ if local_id . x == WG_SIZE - 1u {
167
+ for (var spin_count : u32 = 0u ; spin_count < MAX_SPIN_COUNT; ) {
168
168
//TRANS_IX
169
- if ( ! inc0 && ! red0 ) {
169
+ if ! inc0 && ! red0 {
170
170
attempt_lookback (
171
171
part_ix ,
172
172
lookback_ix ,
173
173
0u ,
174
- agg [0u ],
174
+ agg [0 ],
175
175
& spin_count ,
176
176
& prev0 ,
177
177
& red0 ,
178
178
& inc0 );
179
179
}
180
180
181
181
//PATHSEG_IX
182
- if ( ! inc1 && ! red1 ) {
182
+ if ! inc1 && ! red1 {
183
183
attempt_lookback (
184
184
part_ix ,
185
185
lookback_ix ,
186
186
1u ,
187
- agg [1u ],
187
+ agg [1 ],
188
188
& spin_count ,
189
189
& prev1 ,
190
190
& red1 ,
191
191
& inc1 );
192
192
}
193
193
194
194
//PATHSEG_OFFSET
195
- if ( ! inc2 && ! red2 ) {
195
+ if ! inc2 && ! red2 {
196
196
attempt_lookback (
197
197
part_ix ,
198
198
lookback_ix ,
199
199
2u ,
200
- agg [2u ],
200
+ agg [2 ],
201
201
& spin_count ,
202
202
& prev2 ,
203
203
& red2 ,
204
204
& inc2 );
205
205
}
206
206
207
207
//STYLE_IX
208
- if ( ! inc3 && ! red3 ) {
208
+ if ! inc3 && ! red3 {
209
209
attempt_lookback (
210
210
part_ix ,
211
211
lookback_ix ,
212
212
3u ,
213
- agg [3u ],
213
+ agg [3 ],
214
214
& spin_count ,
215
215
& prev3 ,
216
216
& red3 ,
217
217
& inc3 );
218
218
}
219
219
220
220
//PATH_IX
221
- if ( ! inc4 && ! red4 ) {
221
+ if ! inc4 && ! red4 {
222
222
attempt_lookback (
223
223
part_ix ,
224
224
lookback_ix ,
225
225
4u ,
226
- agg [4u ],
226
+ agg [4 ],
227
227
& spin_count ,
228
228
& prev4 ,
229
229
& red4 ,
230
230
& inc4 );
231
231
}
232
232
233
233
//Have we completed the current reduction or inclusive sum for all PathTag members?
234
- if (( inc0 || red0 ) && (inc1 || red1 ) && (inc2 || red2 ) && (inc3 || red3 ) && (inc4 || red4 )) {
235
- if ( inc0 && inc1 && inc2 && inc3 && inc4 ) {
234
+ if ( inc0 || red0 ) && (inc1 || red1 ) && (inc2 || red2 ) && (inc3 || red3 ) && (inc4 || red4 ) {
235
+ if inc0 && inc1 && inc2 && inc3 && inc4 {
236
236
sh_lock = UNLOCKED ;
237
237
break ;
238
238
} else {
@@ -251,7 +251,7 @@ fn main(
251
251
//If we didn't complete the lookback within the allotted spins,
252
252
//prepare for the fallback by broadcasting the lookback tile id
253
253
//and states of the tagmonoid struct members
254
- if ( sh_lock == LOCKED ) {
254
+ if sh_lock == LOCKED {
255
255
sh_broadcast = lookback_ix ;
256
256
sh_fallback_state [0 ] = ! inc0 && ! red0 ;
257
257
sh_fallback_state [1 ] = ! inc1 && ! red1 ;
@@ -263,7 +263,7 @@ fn main(
263
263
workgroupBarrier ();
264
264
265
265
//Fallback
266
- if ( sh_lock == LOCKED ) {
266
+ if sh_lock == LOCKED {
267
267
let fallback_ix = sh_broadcast ;
268
268
269
269
red0 = sh_fallback_state [0 ];
@@ -282,114 +282,114 @@ fn main(
282
282
workgroupBarrier ();
283
283
if local_id . x + (1u << i ) < WG_SIZE {
284
284
let index = local_id . x + (1u << i );
285
- if ( red0 ) {
285
+ if red0 {
286
286
f_agg [0 ] += sh_fallback [index ][0 ];
287
287
}
288
- if ( red1 ) {
288
+ if red1 {
289
289
f_agg [1 ] += sh_fallback [index ][1 ];
290
290
}
291
- if ( red2 ) {
291
+ if red2 {
292
292
f_agg [2 ] += sh_fallback [index ][2 ];
293
293
}
294
- if ( red3 ) {
294
+ if red3 {
295
295
f_agg [3 ] += sh_fallback [index ][3 ];
296
296
}
297
- if ( red4 ) {
297
+ if red4 {
298
298
f_agg [4 ] += sh_fallback [index ][4 ];
299
299
}
300
300
}
301
301
workgroupBarrier ();
302
- if ( red0 ) {
302
+ if red0 {
303
303
sh_fallback [local_id . x][0 ] = f_agg [0 ];
304
304
}
305
305
306
- if ( red1 ) {
306
+ if red1 {
307
307
sh_fallback [local_id . x][1 ] = f_agg [1 ];
308
308
}
309
309
310
- if ( red2 ) {
310
+ if red2 {
311
311
sh_fallback [local_id . x][2 ] = f_agg [2 ];
312
312
}
313
313
314
- if ( red3 ) {
314
+ if red3 {
315
315
sh_fallback [local_id . x][3 ] = f_agg [3 ];
316
316
}
317
317
318
- if ( red4 ) {
318
+ if red4 {
319
319
sh_fallback [local_id . x][4 ] = f_agg [4 ];
320
320
}
321
321
}
322
322
323
323
//Fallback and attempt insertion of status flag
324
- if ( local_id . x == WG_SIZE - 1u ) {
324
+ if local_id . x == WG_SIZE - 1u {
325
325
//TRANS_IX FALLBACK
326
- if ( red0 ) {
326
+ if red0 {
327
327
fallback (
328
328
part_ix ,
329
329
fallback_ix ,
330
330
0u ,
331
- agg [0u ],
332
- f_agg [0u ],
331
+ agg [0 ],
332
+ f_agg [0 ],
333
333
& prev0 ,
334
334
& inc0 ,
335
335
);
336
336
}
337
337
338
338
//PATHSEG_IX FALLBACK
339
- if ( red1 ) {
339
+ if red1 {
340
340
fallback (
341
341
part_ix ,
342
342
fallback_ix ,
343
343
1u ,
344
- agg [1u ],
345
- f_agg [1u ],
344
+ agg [1 ],
345
+ f_agg [1 ],
346
346
& prev1 ,
347
347
& inc1 ,
348
348
);
349
349
}
350
350
351
351
//PATHSEG_OFFSET FALLBACK
352
- if ( red2 ) {
352
+ if red2 {
353
353
fallback (
354
354
part_ix ,
355
355
fallback_ix ,
356
356
2u ,
357
- agg [2u ],
358
- f_agg [2u ],
357
+ agg [2 ],
358
+ f_agg [2 ],
359
359
& prev2 ,
360
360
& inc2 ,
361
361
);
362
362
}
363
363
364
364
//STYLE_IX FALLBACK
365
- if ( red3 ) {
365
+ if red3 {
366
366
fallback (
367
367
part_ix ,
368
368
fallback_ix ,
369
369
3u ,
370
- agg [3u ],
371
- f_agg [3u ],
370
+ agg [3 ],
371
+ f_agg [3 ],
372
372
& prev3 ,
373
373
& inc3 ,
374
374
);
375
375
}
376
376
377
377
//PATH_IX FALLBACK
378
- if ( red4 ) {
378
+ if red4 {
379
379
fallback (
380
380
part_ix ,
381
381
fallback_ix ,
382
382
4u ,
383
- agg [4u ],
384
- f_agg [4u ],
383
+ agg [4 ],
384
+ f_agg [4 ],
385
385
& prev4 ,
386
386
& inc4 ,
387
387
);
388
388
}
389
389
390
390
//At this point, the reductions are guaranteed to be complete,
391
391
//so try unlocking, else, keep looking back
392
- if ( inc0 && inc1 && inc2 && inc3 && inc4 ) {
392
+ if inc0 && inc1 && inc2 && inc3 && inc4 {
393
393
sh_lock = UNLOCKED ;
394
394
} else {
395
395
lookback_ix -- ;
@@ -402,7 +402,7 @@ fn main(
402
402
workgroupBarrier ();
403
403
404
404
var tm : array <u32 , 5 >;
405
- if ( part_ix != 0u ) {
405
+ if part_ix != 0u {
406
406
tm = sh_tag_broadcast ;
407
407
} else {
408
408
tm [0 ] = 0u ;
@@ -412,7 +412,7 @@ fn main(
412
412
tm [4 ] = 0u ;
413
413
}
414
414
415
- if ( local_id . x != 0u ) {
415
+ if local_id . x != 0u {
416
416
let other : array <u32 , 5 > = sh_scratch [local_id . x - 1u ];
417
417
tm [0 ] += other [0 ];
418
418
tm [1 ] += other [1 ];
0 commit comments