We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 140d3ad commit 32caa1cCopy full SHA for 32caa1c
csrc/cuda/utils.cuh
@@ -18,6 +18,16 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
18
return __shfl_down_sync(mask, var.operator __half(), delta);
19
}
20
21
+__device__ __inline__ at::Half __shfl_up(const at::Half var,
22
+ const unsigned int delta) {
23
+ return __shfl_up(var.operator __half(), delta);
24
+}
25
+
26
+__device__ __inline__ at::Half __shfl_down(const at::Half var,
27
28
+ return __shfl_down(var.operator __half(), delta);
29
30
31
#ifdef USE_ROCM
32
__device__ __inline__ at::Half __ldg(const at::Half* ptr) {
33
return __ldg(reinterpret_cast<const __half*>(ptr));
0 commit comments