Skip to content

Commit 32caa1c

Browse files
authored
[ROCm] fixes ambiguous calls to shfl* where there is no explicit type (#411)
convertion from c10::Half to __half
1 parent 140d3ad commit 32caa1c

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

csrc/cuda/utils.cuh

+10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
1818
return __shfl_down_sync(mask, var.operator __half(), delta);
1919
}
2020

21+
__device__ __inline__ at::Half __shfl_up(const at::Half var,
22+
const unsigned int delta) {
23+
return __shfl_up(var.operator __half(), delta);
24+
}
25+
26+
__device__ __inline__ at::Half __shfl_down(const at::Half var,
27+
const unsigned int delta) {
28+
return __shfl_down(var.operator __half(), delta);
29+
}
30+
2131
#ifdef USE_ROCM
2232
__device__ __inline__ at::Half __ldg(const at::Half* ptr) {
2333
return __ldg(reinterpret_cast<const __half*>(ptr));

0 commit comments

Comments
 (0)