add bf16_alias.hpp

yhmtsai · yhmtsai · commit 4d1bfdf0b8e4 · 2025-04-16T13:51:11.000+02:00
diff --git a/common/cuda_hip/base/bf16_alias.hpp b/common/cuda_hip/base/bf16_alias.hpp
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: 2025 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef GKO_COMMON_CUDA_HIP_BASE_BF16_ALIAS_HPP_
+#define GKO_COMMON_CUDA_HIP_BASE_BF16_ALIAS_HPP_
+
+
+#ifdef GKO_COMPILING_CUDA
+
+
+#include <cuda_bf16.h>
+
+
+namespace gko {
+
+
+using vendor_bf16 = __nv_bfloat16;
+
+
+}
+
+
+#elif defined(GKO_COMPILING_HIP)
+
+
+#if HIP_VERSION >= 60200000
+// HIP has __hip_bfloat16 after ROCM 5.6.0 but enough implementation for us
+// (conversion and operation overload) after ROCM 6.2.0 which provides more
+// native operations support.
+#include <hip/hip_bf16.h>
+
+namespace gko {
+
+
+using vendor_bf16 = __hip_bfloat16;
+
+
+}
+
+
+#else
+
+
+// HIP has hip_bfloat16 but only the type with the operation fallback to the
+// single precision
+#include <hip/hip_bfloat16.h>
+
+
+namespace gko {
+
+
+using vendor_bf16 = hip_bfloat16;
+
+
+}
+
+
+#endif
+#endif
+#endif  // GKO_COMMON_CUDA_HIP_BASE_BF16_ALIAS_HPP_
diff --git a/common/cuda_hip/base/math.hpp b/common/cuda_hip/base/math.hpp
@@ -14,35 +14,18 @@
 #ifdef GKO_COMPILING_CUDA
 
 
-#include <cuda_bf16.h>
 #include <cuda_fp16.h>
 
-using vendor_bf16 = __nv_bfloat16;
-
 
 #elif defined(GKO_COMPILING_HIP)
 
 
-#if HIP_VERSION >= 60200000
-// HIP has __hip_bfloat16 after ROCM 5.6.0 but enough implementation for us
-// (conversion and operation overload) after ROCM 6.2.0 which provides more
-// native operations support.
-#include <hip/hip_bf16.h>
-using vendor_bf16 = __hip_bfloat16;
-#else
-// HIP has hip_bfloat16 but only the type with the operation fallback to the
-// single precision
-#include <hip/hip_bfloat16.h>
-using vendor_bf16 = hip_bfloat16;
-#endif
-
-
 #include <hip/hip_fp16.h>
 
 
 #endif
 
-
+#include "common/cuda_hip/base/bf16_alias.hpp"
 #include "common/cuda_hip/base/thrust_macro.hpp"
 
 
@@ -242,16 +225,16 @@ GKO_ATTRIBUTES GKO_INLINE __half abs<__half>(const complex<__half>& z)
 #if GINKGO_ENABLE_BFLOAT16
 
 template <>
-GKO_ATTRIBUTES GKO_INLINE complex<vendor_bf16> sqrt<vendor_bf16>(
-    const complex<vendor_bf16>& a)
+GKO_ATTRIBUTES GKO_INLINE complex<gko::vendor_bf16> sqrt<gko::vendor_bf16>(
+    const complex<gko::vendor_bf16>& a)
 {
     return sqrt(static_cast<complex<float>>(a));
 }
 
 
 template <>
-GKO_ATTRIBUTES GKO_INLINE vendor_bf16
-abs<vendor_bf16>(const complex<vendor_bf16>& z)
+GKO_ATTRIBUTES GKO_INLINE gko::vendor_bf16 abs<gko::vendor_bf16>(
+    const complex<gko::vendor_bf16>& z)
 {
     return abs(static_cast<complex<float>>(z));
 }
diff --git a/common/cuda_hip/base/types.hpp b/common/cuda_hip/base/types.hpp
@@ -5,6 +5,7 @@
 #ifndef GKO_COMMON_CUDA_HIP_BASE_TYPES_HPP_
 #define GKO_COMMON_CUDA_HIP_BASE_TYPES_HPP_
 
+#include "common/cuda_hip/base/bf16_alias.hpp"
 #include "common/cuda_hip/base/math.hpp"
 #if defined(GKO_COMPILING_CUDA)
 #include "cuda/base/types.hpp"
@@ -34,13 +35,13 @@ THRUST_HALF_FRIEND_OPERATOR(/, /=)
 #undef THRUST_HALF_FRIEND_OPERATOR
 
 
-#define THRUST_BF16_FRIEND_OPERATOR(_op, _opeq)                          \
-    GKO_ATTRIBUTES GKO_INLINE GKO_THRUST_QUALIFIER::complex<vendor_bf16> \
-    operator _op(const GKO_THRUST_QUALIFIER::complex<vendor_bf16> lhs,   \
-                 const GKO_THRUST_QUALIFIER::complex<vendor_bf16> rhs)   \
-    {                                                                    \
-        return GKO_THRUST_QUALIFIER::complex<float>{                     \
-            lhs} _op GKO_THRUST_QUALIFIER::complex<float>(rhs);          \
+#define THRUST_BF16_FRIEND_OPERATOR(_op, _opeq)                               \
+    GKO_ATTRIBUTES GKO_INLINE GKO_THRUST_QUALIFIER::complex<gko::vendor_bf16> \
+    operator _op(const GKO_THRUST_QUALIFIER::complex<gko::vendor_bf16> lhs,   \
+                 const GKO_THRUST_QUALIFIER::complex<gko::vendor_bf16> rhs)   \
+    {                                                                         \
+        return GKO_THRUST_QUALIFIER::complex<float>{                          \
+            lhs} _op GKO_THRUST_QUALIFIER::complex<float>(rhs);               \
     }
 
 THRUST_BF16_FRIEND_OPERATOR(+, +=)
diff --git a/core/test/base/bfloat16.cpp b/core/test/base/bfloat16.cpp
@@ -134,22 +134,6 @@ TEST(FloatToBFloat16, TruncatesLargeNumberRoundToEven)
 }
 
 
-// TEST(FloatToBFloat16, Convert)
-// {
-//     float rho = 86.25;
-//     float beta = 1110;
-//     auto float_res = rho/beta;
-//     gko::bfloat16 rho_h = rho;
-//     gko::bfloat16 beta_h = beta;
-//     auto bfloat16_res = rho_h/beta_h;
-//     std::cout << float_res << std::endl;
-//     std::cout << float(bfloat16_res) << std::endl;
-
-//     std::complex<gko::bfloat16> cpx{100.0, 0.0};
-//     std::cout << float(gko::squared_norm(cpx)) << std::endl;
-// }
-
-
 TEST(Bfloat16ToFloat, ConvertsOne)
 {
     float x = create_from_bits<bfloat16>("0" "01111111" "0000000");
diff --git a/dpcpp/base/bf16_alias.hpp b/dpcpp/base/bf16_alias.hpp
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: 2025 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef GKO_DPCPP_BASE_BF16_ALIAS_HPP_
+#define GKO_DPCPP_BASE_BF16_ALIAS_HPP_
+
+#include <sycl/ext/oneapi/bfloat16.hpp>
+
+namespace gko {
+
+
+using vendor_bf16 = sycl::ext::oneapi::bfloat16;
+
+
+}
+
+
+#endif  // GKO_DPCPP_BASE_BF16_ALIAS_HPP_
diff --git a/dpcpp/base/complex.hpp b/dpcpp/base/complex.hpp
@@ -7,11 +7,12 @@
 
 #include <complex>
 
-#include <sycl/ext/oneapi/bfloat16.hpp>
 #include <sycl/half_type.hpp>
 
 #include <ginkgo/config.hpp>
 
+#include "dpcpp/base/bf16_alias.hpp"
+
 
 namespace gko {
 
@@ -204,9 +205,9 @@ class complex<sycl::half> {
 
 
 template <>
-class complex<sycl::ext::oneapi::bfloat16> {
+class complex<vendor_bf16> {
 public:
-    using value_type = sycl::ext::oneapi::bfloat16;
+    using value_type = vendor_bf16;
 
     complex(const value_type& real = value_type(0.f),
             const value_type& imag = value_type(0.f))
diff --git a/dpcpp/base/math.hpp b/dpcpp/base/math.hpp
@@ -9,11 +9,11 @@
 #include <cmath>
 
 #include <sycl/bit_cast.hpp>
-#include <sycl/ext/oneapi/bfloat16.hpp>
 #include <sycl/half_type.hpp>
 
 #include <ginkgo/core/base/math.hpp>
 
+#include "dpcpp/base/bf16_alias.hpp"
 #include "dpcpp/base/complex.hpp"
 #include "dpcpp/base/dpct.hpp"
 
@@ -31,8 +31,8 @@ struct basic_float_traits<sycl::half> {
 };
 
 template <>
-struct basic_float_traits<sycl::ext::oneapi::bfloat16> {
-    using type = sycl::ext::oneapi::bfloat16;
+struct basic_float_traits<vendor_bf16> {
+    using type = vendor_bf16;
     static constexpr int sign_bits = 1;
     static constexpr int significand_bits = 7;
     static constexpr int exponent_bits = 8;
@@ -44,8 +44,7 @@ template <>
 struct is_complex_or_scalar_impl<sycl::half> : public std::true_type {};
 
 template <>
-struct is_complex_or_scalar_impl<sycl::ext::oneapi::bfloat16>
-    : public std::true_type {};
+struct is_complex_or_scalar_impl<vendor_bf16> : public std::true_type {};
 
 template <typename ValueType>
 struct complex_helper {
@@ -58,8 +57,8 @@ struct complex_helper<sycl::half> {
 };
 
 template <>
-struct complex_helper<sycl::ext::oneapi::bfloat16> {
-    using type = gko::complex<sycl::ext::oneapi::bfloat16>;
+struct complex_helper<vendor_bf16> {
+    using type = gko::complex<vendor_bf16>;
 };
 
 
@@ -105,22 +104,22 @@ struct device_numeric_limits {
 // constructor. we use sycl::bit_cast (not guarenteed be constexpr) to create
 // the corresponding bfloat16
 template <>
-struct device_numeric_limits<sycl::ext::oneapi::bfloat16> {
+struct device_numeric_limits<vendor_bf16> {
     static GKO_ATTRIBUTES GKO_INLINE auto inf()
     {
-        return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(
+        return sycl::bit_cast<vendor_bf16>(
             static_cast<unsigned short>(0b0'11111111'0000000u));
     }
 
     static GKO_ATTRIBUTES GKO_INLINE auto max()
     {
-        return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(
+        return sycl::bit_cast<vendor_bf16>(
             static_cast<unsigned short>(0b0'11111110'1111111u));
     }
 
     static GKO_ATTRIBUTES GKO_INLINE auto min()
     {
-        return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(
+        return sycl::bit_cast<vendor_bf16>(
             static_cast<unsigned short>(0b0'00000001'0000000u));
     }
 };
@@ -170,51 +169,45 @@ bool __dpct_inline__ is_finite(const gko::complex<sycl::half>& value)
 }
 
 
-bool __dpct_inline__ is_nan(const sycl::ext::oneapi::bfloat16& val)
+bool __dpct_inline__ is_nan(const vendor_bf16& val)
 {
     return std::isnan(static_cast<float>(val));
 }
 
-bool __dpct_inline__
-is_nan(const gko::complex<sycl::ext::oneapi::bfloat16>& val)
+bool __dpct_inline__ is_nan(const gko::complex<vendor_bf16>& val)
 {
     return is_nan(val.real()) || is_nan(val.imag());
 }
 
 
-sycl::ext::oneapi::bfloat16 __dpct_inline__
-abs(const sycl::ext::oneapi::bfloat16& val)
+vendor_bf16 __dpct_inline__ abs(const vendor_bf16& val)
 {
     return abs(static_cast<float>(val));
 }
 
-sycl::ext::oneapi::bfloat16 __dpct_inline__
-abs(const gko::complex<sycl::ext::oneapi::bfloat16>& val)
+vendor_bf16 __dpct_inline__ abs(const gko::complex<vendor_bf16>& val)
 {
     return abs(static_cast<std::complex<float>>(val));
 }
 
-sycl::ext::oneapi::bfloat16 __dpct_inline__
-sqrt(const sycl::ext::oneapi::bfloat16& val)
+vendor_bf16 __dpct_inline__ sqrt(const vendor_bf16& val)
 {
     return sqrt(static_cast<float>(val));
 }
 
-gko::complex<sycl::ext::oneapi::bfloat16> __dpct_inline__
-sqrt(const gko::complex<sycl::ext::oneapi::bfloat16>& val)
+gko::complex<vendor_bf16> __dpct_inline__
+sqrt(const gko::complex<vendor_bf16>& val)
 {
     return sqrt(static_cast<std::complex<float>>(val));
 }
 
 
-bool __dpct_inline__ is_finite(const sycl::ext::oneapi::bfloat16& value)
+bool __dpct_inline__ is_finite(const vendor_bf16& value)
 {
-    return abs(value) <
-           device_numeric_limits<sycl::ext::oneapi::bfloat16>::inf();
+    return abs(value) < device_numeric_limits<vendor_bf16>::inf();
 }
 
-bool __dpct_inline__
-is_finite(const gko::complex<sycl::ext::oneapi::bfloat16>& value)
+bool __dpct_inline__ is_finite(const gko::complex<vendor_bf16>& value)
 {
     return is_finite(value.real()) && is_finite(value.imag());
 }
diff --git a/dpcpp/base/types.hpp b/dpcpp/base/types.hpp
@@ -15,6 +15,7 @@
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/types.hpp>
 
+#include "dpcpp/base/bf16_alias.hpp"
 #include "dpcpp/base/complex.hpp"
 
 
@@ -56,7 +57,7 @@ struct sycl_type_impl<half> {
 
 template <>
 struct sycl_type_impl<bfloat16> {
-    using type = sycl::ext::oneapi::bfloat16;
+    using type = vendor_bf16;
 };
 
 template <typename T>
diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp
diff --git a/hip/test/base/math.hip.cpp b/hip/test/base/math.hip.cpp

Original file line number	Diff line number	Diff line change
`@@ -9,11 +9,11 @@`
`9`	`9`	`#include <cmath>`
`10`	`10`
`11`	`11`	`#include <sycl/bit_cast.hpp>`
`12`		`-#include <sycl/ext/oneapi/bfloat16.hpp>`
`13`	`12`	`#include <sycl/half_type.hpp>`
`14`	`13`
`15`	`14`	`#include <ginkgo/core/base/math.hpp>`
`16`	`15`
	`16`	`+#include "dpcpp/base/bf16_alias.hpp"`
`17`	`17`	`#include "dpcpp/base/complex.hpp"`
`18`	`18`	`#include "dpcpp/base/dpct.hpp"`
`19`	`19`
`@@ -31,8 +31,8 @@ struct basic_float_traits<sycl::half> {`
`31`	`31`	`};`
`32`	`32`
`33`	`33`	`template <>`
`34`		`-struct basic_float_traits<sycl::ext::oneapi::bfloat16> {`
`35`		`- using type = sycl::ext::oneapi::bfloat16;`
	`34`	`+struct basic_float_traits<vendor_bf16> {`
	`35`	`+ using type = vendor_bf16;`
`36`	`36`	`static constexpr int sign_bits = 1;`
`37`	`37`	`static constexpr int significand_bits = 7;`
`38`	`38`	`static constexpr int exponent_bits = 8;`
`@@ -44,8 +44,7 @@ template <>`
`44`	`44`	`struct is_complex_or_scalar_impl<sycl::half> : public std::true_type {};`
`45`	`45`
`46`	`46`	`template <>`
`47`		`-struct is_complex_or_scalar_impl<sycl::ext::oneapi::bfloat16>`
`48`		`- : public std::true_type {};`
	`47`	`+struct is_complex_or_scalar_impl<vendor_bf16> : public std::true_type {};`
`49`	`48`
`50`	`49`	`template <typename ValueType>`
`51`	`50`	`struct complex_helper {`
`@@ -58,8 +57,8 @@ struct complex_helper<sycl::half> {`
`58`	`57`	`};`
`59`	`58`
`60`	`59`	`template <>`
`61`		`-struct complex_helper<sycl::ext::oneapi::bfloat16> {`
`62`		`- using type = gko::complex<sycl::ext::oneapi::bfloat16>;`
	`60`	`+struct complex_helper<vendor_bf16> {`
	`61`	`+ using type = gko::complex<vendor_bf16>;`
`63`	`62`	`};`
`64`	`63`
`65`	`64`
`@@ -105,22 +104,22 @@ struct device_numeric_limits {`
`105`	`104`	`// constructor. we use sycl::bit_cast (not guarenteed be constexpr) to create`
`106`	`105`	`// the corresponding bfloat16`
`107`	`106`	`template <>`
`108`		`-struct device_numeric_limits<sycl::ext::oneapi::bfloat16> {`
	`107`	`+struct device_numeric_limits<vendor_bf16> {`
`109`	`108`	`static GKO_ATTRIBUTES GKO_INLINE auto inf()`
`110`	`109`	`{`
`111`		`- return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(`
	`110`	`+ return sycl::bit_cast<vendor_bf16>(`
`112`	`111`	`static_cast<unsigned short>(0b0'11111111'0000000u));`
`113`	`112`	`}`
`114`	`113`
`115`	`114`	`static GKO_ATTRIBUTES GKO_INLINE auto max()`
`116`	`115`	`{`
`117`		`- return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(`
	`116`	`+ return sycl::bit_cast<vendor_bf16>(`
`118`	`117`	`static_cast<unsigned short>(0b0'11111110'1111111u));`
`119`	`118`	`}`
`120`	`119`
`121`	`120`	`static GKO_ATTRIBUTES GKO_INLINE auto min()`
`122`	`121`	`{`
`123`		`- return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(`
	`122`	`+ return sycl::bit_cast<vendor_bf16>(`
`124`	`123`	`static_cast<unsigned short>(0b0'00000001'0000000u));`
`125`	`124`	`}`
`126`	`125`	`};`
`@@ -170,51 +169,45 @@ bool __dpct_inline__ is_finite(const gko::complex<sycl::half>& value)`
`170`	`169`	`}`
`171`	`170`
`172`	`171`
`173`		`-bool __dpct_inline__ is_nan(const sycl::ext::oneapi::bfloat16& val)`
	`172`	`+bool __dpct_inline__ is_nan(const vendor_bf16& val)`
`174`	`173`	`{`
`175`	`174`	`return std::isnan(static_cast<float>(val));`
`176`	`175`	`}`
`177`	`176`
`178`		`-bool __dpct_inline__`
`179`		`-is_nan(const gko::complex<sycl::ext::oneapi::bfloat16>& val)`
	`177`	`+bool __dpct_inline__ is_nan(const gko::complex<vendor_bf16>& val)`
`180`	`178`	`{`
`181`	`179`	`return is_nan(val.real()) \|\| is_nan(val.imag());`
`182`	`180`	`}`
`183`	`181`
`184`	`182`
`185`		`-sycl::ext::oneapi::bfloat16 __dpct_inline__`
`186`		`-abs(const sycl::ext::oneapi::bfloat16& val)`
	`183`	`+vendor_bf16 __dpct_inline__ abs(const vendor_bf16& val)`
`187`	`184`	`{`
`188`	`185`	`return abs(static_cast<float>(val));`
`189`	`186`	`}`
`190`	`187`
`191`		`-sycl::ext::oneapi::bfloat16 __dpct_inline__`
`192`		`-abs(const gko::complex<sycl::ext::oneapi::bfloat16>& val)`
	`188`	`+vendor_bf16 __dpct_inline__ abs(const gko::complex<vendor_bf16>& val)`
`193`	`189`	`{`
`194`	`190`	`return abs(static_cast<std::complex<float>>(val));`
`195`	`191`	`}`
`196`	`192`
`197`		`-sycl::ext::oneapi::bfloat16 __dpct_inline__`
`198`		`-sqrt(const sycl::ext::oneapi::bfloat16& val)`
	`193`	`+vendor_bf16 __dpct_inline__ sqrt(const vendor_bf16& val)`
`199`	`194`	`{`
`200`	`195`	`return sqrt(static_cast<float>(val));`
`201`	`196`	`}`
`202`	`197`
`203`		`-gko::complex<sycl::ext::oneapi::bfloat16> __dpct_inline__`
`204`		`-sqrt(const gko::complex<sycl::ext::oneapi::bfloat16>& val)`
	`198`	`+gko::complex<vendor_bf16> __dpct_inline__`
	`199`	`+sqrt(const gko::complex<vendor_bf16>& val)`
`205`	`200`	`{`
`206`	`201`	`return sqrt(static_cast<std::complex<float>>(val));`
`207`	`202`	`}`
`208`	`203`
`209`	`204`
`210`		`-bool __dpct_inline__ is_finite(const sycl::ext::oneapi::bfloat16& value)`
	`205`	`+bool __dpct_inline__ is_finite(const vendor_bf16& value)`
`211`	`206`	`{`
`212`		`- return abs(value) <`
`213`		`- device_numeric_limits<sycl::ext::oneapi::bfloat16>::inf();`
	`207`	`+ return abs(value) < device_numeric_limits<vendor_bf16>::inf();`
`214`	`208`	`}`
`215`	`209`
`216`		`-bool __dpct_inline__`
`217`		`-is_finite(const gko::complex<sycl::ext::oneapi::bfloat16>& value)`
	`210`	`+bool __dpct_inline__ is_finite(const gko::complex<vendor_bf16>& value)`
`218`	`211`	`{`
`219`	`212`	`return is_finite(value.real()) && is_finite(value.imag());`
`220`	`213`	`}`