some changes necessary for bf16/fp16 updates in rocm (#2480)

benson31 · web-flow · commit a7bf7b9d8f04 · 2025-03-19T14:43:58.000-04:00
diff --git a/include/lbann/utils/impl/rocm.hpp b/include/lbann/utils/impl/rocm.hpp
@@ -31,6 +31,7 @@
 #include "hipcub/block/block_reduce.hpp"
 #endif // HYDROGEN_HAVE_CUB
 #include <hip/hip_fp16.h>
+#include <hip/hip_bf16.h>
 #include <limits>
 #endif // __HIPCC__
 
@@ -165,7 +166,7 @@ __device__ __forceinline__ T gpu_lib::block_reduce(T val)
 #define WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(func)                               \
   __device__ __forceinline__ __half gpu_lib::func(__half const& x)             \
   {                                                                            \
-    return ::h##func(x);                                                       \
+    return h##func(x);                                                       \
   }
 
 // FIXME (trb): This is maybe not the best long-term solution, but it
@@ -190,7 +191,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(exp)
 //  implementation could be:
 __device__ __forceinline__ __half gpu_lib::expm1(__half const& x)
 {
-  return ::__hsub(::hexp(x), ::__float2half(1.f));
+  return __hsub(hexp(x), __float2half(1.f));
 }
 
 WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(log)
@@ -204,7 +205,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(sin)
 //  accurate than a native implementation.
 __device__ __forceinline__ __half gpu_lib::tan(__half const& x)
 {
-  return ::__hdiv(::hsin(x), ::hcos(x));
+  return __hdiv(hsin(x), hcos(x));
 }
 
 WRAP_UNARY_ROCM_HALF_CAST_TO_FLOAT_MATH_FUNCTION(acos)
@@ -242,12 +243,12 @@ __device__ __forceinline__ bool gpu_lib::isfinite(__half const& x)
 // Binary math functions
 __device__ __forceinline__ __half gpu_lib::min(const __half& x, const __half& y)
 {
-  return ::__hle(x, y) ? x : y;
+  return __hle(x, y) ? x : y;
 }
 
 __device__ __forceinline__ __half gpu_lib::max(const __half& x, const __half& y)
 {
-  return ::__hle(x, y) ? y : x;
+  return __hle(x, y) ? y : x;
 }
 
 // Numeric limits

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`	`#include "hipcub/block/block_reduce.hpp"`
`32`	`32`	`#endif // HYDROGEN_HAVE_CUB`
`33`	`33`	`#include <hip/hip_fp16.h>`
	`34`	`+#include <hip/hip_bf16.h>`
`34`	`35`	`#include <limits>`
`35`	`36`	`#endif // __HIPCC__`
`36`	`37`
`@@ -165,7 +166,7 @@ __device__ __forceinline__ T gpu_lib::block_reduce(T val)`
`165`	`166`	`#define WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(func) \`
`166`	`167`	`__device__ __forceinline__ __half gpu_lib::func(__half const& x) \`
`167`	`168`	`{ \`
`168`		`- return ::h##func(x); \`
	`169`	`+ return h##func(x); \`
`169`	`170`	`}`
`170`	`171`
`171`	`172`	`// FIXME (trb): This is maybe not the best long-term solution, but it`
`@@ -190,7 +191,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(exp)`
`190`	`191`	`// implementation could be:`
`191`	`192`	`__device__ __forceinline__ __half gpu_lib::expm1(__half const& x)`
`192`	`193`	`{`
`193`		`- return ::__hsub(::hexp(x), ::__float2half(1.f));`
	`194`	`+ return __hsub(hexp(x), __float2half(1.f));`
`194`	`195`	`}`
`195`	`196`
`196`	`197`	`WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(log)`
`@@ -204,7 +205,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(sin)`
`204`	`205`	`// accurate than a native implementation.`
`205`	`206`	`__device__ __forceinline__ __half gpu_lib::tan(__half const& x)`
`206`	`207`	`{`
`207`		`- return ::__hdiv(::hsin(x), ::hcos(x));`
	`208`	`+ return __hdiv(hsin(x), hcos(x));`
`208`	`209`	`}`
`209`	`210`
`210`	`211`	`WRAP_UNARY_ROCM_HALF_CAST_TO_FLOAT_MATH_FUNCTION(acos)`
`@@ -242,12 +243,12 @@ __device__ __forceinline__ bool gpu_lib::isfinite(__half const& x)`
`242`	`243`	`// Binary math functions`
`243`	`244`	`__device__ __forceinline__ __half gpu_lib::min(const __half& x, const __half& y)`
`244`	`245`	`{`
`245`		`- return ::__hle(x, y) ? x : y;`
	`246`	`+ return __hle(x, y) ? x : y;`
`246`	`247`	`}`
`247`	`248`
`248`	`249`	`__device__ __forceinline__ __half gpu_lib::max(const __half& x, const __half& y)`
`249`	`250`	`{`
`250`		`- return ::__hle(x, y) ? y : x;`
	`251`	`+ return __hle(x, y) ? y : x;`
`251`	`252`	`}`
`252`	`253`
`253`	`254`	`// Numeric limits`