Skip to content

Commit 9724e6d

Browse files
committed
properly handle ELEMENTS_PER_THREAD
Signed-off-by: Andrew Duffy <[email protected]>
1 parent cbe6c2b commit 9724e6d

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

vortex-cuda/kernels/src/sequence.cu

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "config.cuh"
45
#include <stdint.h>
56

7+
#define MIN(a, b) (((a) < (b)) : (a) : (b))
8+
69
template<typename ValueT>
710
__device__ void sequence(
811
ValueT *const output,
912
ValueT base,
1013
ValueT multiplier,
1114
uint64_t len
1215
) {
13-
const uint64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
14-
if (idx >= len) {
15-
return;
16-
}
16+
const uint64_t worker = blockIdx.x * blockDim.x + threadIdx.x;
1717

18-
output[idx] = static_cast<ValueT>(idx) * multiplier + base;
18+
const uint64_t elemStart = MIN(worker * ELEMENTS_PER_THREAD, len);
19+
const uint64_t elemEnd = MIN(elemStart + ELEMENTS_PER_THREAD, len);
20+
21+
for (uint64_t idx = elemStart; idx < elemEnd; idx++) {
22+
output[idx] = static_cast<ValueT>(idx) * multiplier + base;
23+
}
1924
}
2025

2126
#define GENERATE_KERNEL(ValueT, suffix) \

0 commit comments

Comments
 (0)