diff --git a/include/lbann/utils/impl/rocm.hpp b/include/lbann/utils/impl/rocm.hpp
index 8eb42158f47..252128e7117 100644
--- a/include/lbann/utils/impl/rocm.hpp
+++ b/include/lbann/utils/impl/rocm.hpp
@@ -31,6 +31,7 @@
 #include "hipcub/block/block_reduce.hpp"
 #endif // HYDROGEN_HAVE_CUB
 #include <hip/hip_fp16.h>
+#include <hip/hip_bf16.h>
 #include <limits>
 #endif // __HIPCC__
 
@@ -165,7 +166,7 @@ __device__ __forceinline__ T gpu_lib::block_reduce(T val)
 #define WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(func)                               \
   __device__ __forceinline__ __half gpu_lib::func(__half const& x)             \
   {                                                                            \
-    return ::h##func(x);                                                       \
+    return h##func(x);                                                       \
   }
 
 // FIXME (trb): This is maybe not the best long-term solution, but it
@@ -190,7 +191,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(exp)
 //  implementation could be:
 __device__ __forceinline__ __half gpu_lib::expm1(__half const& x)
 {
-  return ::__hsub(::hexp(x), ::__float2half(1.f));
+  return __hsub(hexp(x), __float2half(1.f));
 }
 
 WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(log)
@@ -204,7 +205,7 @@ WRAP_UNARY_ROCM_HALF_MATH_FUNCTION(sin)
 //  accurate than a native implementation.
 __device__ __forceinline__ __half gpu_lib::tan(__half const& x)
 {
-  return ::__hdiv(::hsin(x), ::hcos(x));
+  return __hdiv(hsin(x), hcos(x));
 }
 
 WRAP_UNARY_ROCM_HALF_CAST_TO_FLOAT_MATH_FUNCTION(acos)
@@ -242,12 +243,12 @@ __device__ __forceinline__ bool gpu_lib::isfinite(__half const& x)
 // Binary math functions
 __device__ __forceinline__ __half gpu_lib::min(const __half& x, const __half& y)
 {
-  return ::__hle(x, y) ? x : y;
+  return __hle(x, y) ? x : y;
 }
 
 __device__ __forceinline__ __half gpu_lib::max(const __half& x, const __half& y)
 {
-  return ::__hle(x, y) ? y : x;
+  return __hle(x, y) ? y : x;
 }
 
 // Numeric limits