Skip to content

Commit c402d49

Browse files
committed
bump rocm support 6.0.2+
1 parent 89502f9 commit c402d49

13 files changed

Lines changed: 21 additions & 236 deletions

File tree

.gitlab-ci.yml

Lines changed: 2 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -226,88 +226,7 @@ build/cuda131/nompi/gcc/cuda/release/shared:
226226
BUILD_TYPE: "Release"
227227
MODULE_LOAD: "cmake/3.30.8 cuda/13.1.1 gcc/14.3.0"
228228

229-
# ROCm 4.5 and friends
230-
build/amd/nompi/gcc/rocm45/release/shared:
231-
extends:
232-
- .build_and_test_template
233-
- .default_variables
234-
- .quick_test_condition
235-
- .use_gko-rocm45-nompi-gnu8-llvm8
236-
variables:
237-
BUILD_OMP: "ON"
238-
BUILD_HIP: "ON"
239-
NONDEFAULT_STREAM: "ON"
240-
BUILD_TYPE: "Release"
241-
BUILD_HWLOC: "OFF"
242-
243-
build/amd/nompi/clang/rocm45/release/static:
244-
extends:
245-
- .build_and_test_template
246-
- .default_variables
247-
- .full_test_condition
248-
- .use_gko-rocm45-nompi-gnu8-llvm8
249-
variables:
250-
CXX_COMPILER: "clang++"
251-
BUILD_OMP: "ON"
252-
BUILD_HIP: "ON"
253-
BUILD_TYPE: "Release"
254-
BUILD_SHARED_LIBS: "OFF"
255-
ENABLE_HALF: "OFF"
256-
ENABLE_BFLOAT16: "ON"
257-
258-
build/amd/nompi/clang/rocm45/debug/shared:
259-
extends:
260-
- .build_and_test_template
261-
- .default_variables
262-
- .full_test_condition
263-
- .use_gko-rocm45-nompi-gnu8-llvm8
264-
variables:
265-
CXX_COMPILER: "clang++"
266-
BUILD_OMP: "ON"
267-
BUILD_HIP: "ON"
268-
BUILD_TYPE: "Debug"
269-
270-
# ROCm 5.1.4 and friends
271-
build/amd/nompi/gcc/rocm514/debug/static:
272-
extends:
273-
- .build_and_test_template
274-
- .default_variables
275-
- .full_test_condition
276-
- .use_gko-rocm514-nompi-gnu11-llvm11
277-
variables:
278-
BUILD_OMP: "ON"
279-
BUILD_HIP: "ON"
280-
BUILD_TYPE: "Debug"
281-
BUILD_SHARED_LIBS: "OFF"
282-
283-
build/amd/nompi/clang/rocm514/release/shared:
284-
extends:
285-
- .build_and_test_template
286-
- .default_variables
287-
- .full_test_condition
288-
- .use_gko-rocm514-nompi-gnu11-llvm11
289-
variables:
290-
CXX_COMPILER: "clang++"
291-
BUILD_OMP: "ON"
292-
BUILD_HIP: "ON"
293-
BUILD_TYPE: "Release"
294-
ENABLE_HALF: "ON"
295-
ENABLE_BFLOAT16: "ON"
296-
297-
# without omp
298-
build/amd/nompi/gcc/rocm514_wo_omp/release/shared:
299-
extends:
300-
- .build_and_test_template
301-
- .default_variables
302-
- .full_test_condition
303-
- .use_gko-rocm514-nompi-gnu11-llvm11
304-
variables:
305-
BUILD_OMP: "OFF"
306-
BUILD_MPI: "OFF"
307-
BUILD_HIP: "ON"
308-
BUILD_TYPE: "Release"
309-
310-
build/amd/openmpi/gcc/rocm600/release/static:
229+
build/amd/openmpi/gcc/rocm602/release/static:
311230
extends:
312231
- .build_and_test_tum_template
313232
- .default_variables
@@ -319,7 +238,7 @@ build/amd/openmpi/gcc/rocm600/release/static:
319238
BUILD_MPI: "ON"
320239
BUILD_TYPE: "Release"
321240
BUILD_SHARED_LIBS: "OFF"
322-
MODULE_LOAD: "cmake/3.24.4 rocm/6.0.0 gcc/13.3.0 openmpi/5.0.7"
241+
MODULE_LOAD: "cmake/3.24.4 rocm/6.0.2 gcc/13.3.0 openmpi/5.0.7"
323242

324243
build/amd/nompi/gcc/rocm720/release/shared:
325244
extends:

CMakeLists.txt

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,7 @@ if(GINKGO_BUILD_CUDA)
226226
endif()
227227
if(GINKGO_BUILD_HIP)
228228
include(cmake/hip.cmake)
229-
if(GINKGO_HIP_PLATFORM_AMD AND GINKGO_HIP_VERSION VERSION_LESS 5.7)
230-
# Hip allow custom namespace but does not fully make everything in the custom namespace before rocm-5.7
231-
# more specific pr: https://github.com/ROCm/rocThrust/pull/286
232-
message(
233-
STATUS
234-
"Disable custom thrust namespace for hip before 5.7 because hip does not fully support it before 5.7"
235-
)
236-
set(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE OFF)
237-
elseif(
229+
if(
238230
GINKGO_HIP_PLATFORM_AMD
239231
AND GINKGO_HIP_VERSION VERSION_GREATER_EQUAL 7.1
240232
)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ The Ginkgo CUDA module has the following __additional__ requirements:
5454

5555
The Ginkgo HIP module has the following __additional__ requirements:
5656

57-
* _ROCm 4.5+_
57+
* _ROCm 6.0.2+_
5858
* the HIP, hipBLAS, hipSPARSE, hip/rocRAND and rocThrust packages compiled with the ROCm backend
5959
* if the hipFFT package is available, it is used to implement the FFT LinOps.
6060
* _cmake 3.21+_

accessor/hip_helper.hpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -74,11 +74,7 @@ struct hip_type<gko::half> {
7474

7575
template <>
7676
struct hip_type<gko::bfloat16> {
77-
#if HIP_VERSION >= 60200000
7877
using type = __hip_bfloat16;
79-
#else
80-
using type = hip_bfloat16;
81-
#endif
8278
};
8379

8480

cmake/hip.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ find_package(rocrand REQUIRED)
3434
find_package(rocthrust REQUIRED)
3535
find_package(ROCTX)
3636

37-
if(GINKGO_HIP_AMD_UNSAFE_ATOMIC AND GINKGO_HIP_VERSION VERSION_GREATER_EQUAL 5)
37+
if(GINKGO_HIP_AMD_UNSAFE_ATOMIC)
3838
set(CMAKE_HIP_FLAGS
3939
"${CMAKE_HIP_FLAGS} -munsafe-fp-atomics -Wno-unused-command-line-argument"
4040
)

cmake/hip_helpers.cmake

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,3 @@ function(ginkgo_find_hip_version)
2323
)
2424
set(GINKGO_HIP_VERSION ${GINKGO_HIP_VERSION} PARENT_SCOPE)
2525
endfunction()
26-
27-
# This function checks if ROCm might not be detected correctly.
28-
# ROCm < 5.7 has a faulty CMake setup that requires setting
29-
# CMAKE_PREFIX_PATH=$ROCM_PATH/lib/cmake, otherwise HIP will not be detected.
30-
function(ginkgo_check_hip_detection_issue)
31-
if(NOT CMAKE_HIP_COMPILER)
32-
ginkgo_find_hip_version()
33-
if(GINKGO_HIP_VERSION AND GINKGO_HIP_VERSION VERSION_LESS 5.7)
34-
message(
35-
WARNING
36-
"Could not find a HIP compiler, but HIP version ${GINKGO_HIP_VERSION} was detected through "
37-
"hipconfig. Try setting the environment variable CMAKE_PREFIX_PATH=$ROCM_PATH/lib/cmake, or "
38-
"update to ROCm >= 5.7."
39-
)
40-
endif()
41-
endif()
42-
endfunction()

common/cuda_hip/base/bf16_alias.hpp

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2025 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -24,7 +24,6 @@ using vendor_bf16 = __nv_bfloat16;
2424
#elif defined(GKO_COMPILING_HIP)
2525

2626

27-
#if HIP_VERSION >= 60200000
2827
// HIP has __hip_bfloat16 after ROCM 5.6.0 but enough implementation for us
2928
// (conversion and operation overload) after ROCM 6.2.0 which provides more
3029
// native operations support.
@@ -39,23 +38,5 @@ using vendor_bf16 = __hip_bfloat16;
3938
}
4039

4140

42-
#else
43-
44-
45-
// HIP has hip_bfloat16 but only the type with the operation fallback to the
46-
// single precision
47-
#include <hip/hip_bfloat16.h>
48-
49-
50-
namespace gko {
51-
52-
53-
using vendor_bf16 = hip_bfloat16;
54-
55-
56-
}
57-
58-
59-
#endif
6041
#endif
6142
#endif // GKO_COMMON_CUDA_HIP_BASE_BF16_ALIAS_HPP_

common/cuda_hip/base/math.hpp

Lines changed: 5 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -68,7 +68,7 @@ struct device_numeric_limits<__half> {
6868
};
6969

7070

71-
#if defined(GKO_COMPILING_CUDA)
71+
#ifdef GKO_COMPILING_CUDA
7272

7373

7474
template <>
@@ -101,9 +101,6 @@ struct device_numeric_limits<__nv_bfloat16> {
101101
#ifdef GKO_COMPILING_HIP
102102

103103

104-
#if HIP_VERSION >= 60200000
105-
106-
107104
template <>
108105
struct device_numeric_limits<__hip_bfloat16> {
109106
static GKO_ATTRIBUTES GKO_INLINE auto inf()
@@ -129,35 +126,6 @@ struct device_numeric_limits<__hip_bfloat16> {
129126
};
130127

131128

132-
#else
133-
134-
135-
template <>
136-
struct device_numeric_limits<hip_bfloat16> {
137-
static GKO_ATTRIBUTES GKO_INLINE auto inf()
138-
{
139-
hip_bfloat16 vals;
140-
vals.data = static_cast<uint16>(0b0'11111111'0000000u);
141-
return vals;
142-
}
143-
144-
static GKO_ATTRIBUTES GKO_INLINE auto max()
145-
{
146-
hip_bfloat16 vals;
147-
vals.data = static_cast<uint16>(0b0'11111110'1111111u);
148-
return vals;
149-
}
150-
151-
static GKO_ATTRIBUTES GKO_INLINE auto min()
152-
{
153-
hip_bfloat16 vals;
154-
vals.data = static_cast<uint16>(0b0'00000001'0000000u);
155-
return vals;
156-
}
157-
};
158-
159-
160-
#endif
161129
#endif
162130

163131
namespace detail {
@@ -375,8 +343,7 @@ __device__ __forceinline__ bool is_finite(const thrust::complex<__half>& value)
375343
__device__ __forceinline__ bool is_nan(const vendor_bf16& val)
376344
{
377345
// from the cuda_bf16.hpp, amd_hip_bf16.h
378-
#if GINKGO_HIP_PLATFORM_HCC && HIP_VERSION >= 60200000 || \
379-
(defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
346+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
380347
return __hisnan(val);
381348
#else
382349
return isnan(static_cast<float>(val));
@@ -391,8 +358,7 @@ __device__ __forceinline__ bool is_nan(const thrust::complex<vendor_bf16>& val)
391358

392359
__device__ __forceinline__ vendor_bf16 abs(const vendor_bf16& val)
393360
{
394-
#if GINKGO_HIP_PLATFORM_HCC && HIP_VERSION >= 60200000 || \
395-
(defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
361+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
396362
return __habs(val);
397363
#else
398364
return static_cast<vendor_bf16>(abs(static_cast<float>(val)));
@@ -402,8 +368,7 @@ __device__ __forceinline__ vendor_bf16 abs(const vendor_bf16& val)
402368

403369
__device__ __forceinline__ vendor_bf16 sqrt(const vendor_bf16& val)
404370
{
405-
#if GINKGO_HIP_PLATFORM_HCC && HIP_VERSION >= 60200000 || \
406-
(defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
371+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800)
407372
return hsqrt(val);
408373
#else
409374
return static_cast<vendor_bf16>(sqrt(static_cast<float>(val)));
@@ -424,28 +389,7 @@ __device__ __forceinline__ bool is_finite(
424389
return is_finite(value.real()) && is_finite(value.imag());
425390
}
426391

427-
#if defined(GKO_COMPILING_HIP) && HIP_VERSION < 60200000
428-
429-
430-
// hip_bfloat16 does not have a constexpr constructor from int
431-
template <>
432-
GKO_INLINE vendor_bf16 one<vendor_bf16>()
433-
{
434-
vendor_bf16 val;
435-
val.data = static_cast<uint16>(0b0'01111111'0000000u);
436-
return val;
437-
}
438-
439-
// hip_bfloat16 does not have an implicit conversion from float
440-
template <>
441-
GKO_INLINE thrust::complex<vendor_bf16> one<thrust::complex<vendor_bf16>>()
442-
{
443-
thrust::complex<vendor_bf16> val(one<vendor_bf16>());
444-
return val;
445-
}
446-
447392

448-
#endif
449393
#endif // GINKGO_ENABLE_BFLOAT16
450394
#endif // defined(__CUDACC__) || defined(GKO_COMPILING_HIP)
451395

common/unified/components/fill_array_kernels.cpp

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -42,20 +42,8 @@ void fill_seq_array(std::shared_ptr<const DefaultExecutor> exec,
4242
run_kernel(
4343
exec,
4444
[] GKO_KERNEL(auto idx, auto array) {
45-
#if defined(GKO_COMPILING_HIP) && HIP_VERSION < 60200000
46-
if constexpr (std::is_same_v<remove_complex<ValueType>, bfloat16>) {
47-
// hip_bfloat16 does not have implicit conversion, so the
48-
// thrust<hip_bfloat16> can not be from float. Also,
49-
// hip_bfloat16 does not have operator=(float) before 5.4. Thus,
50-
// we cast twice via float before 6.2
51-
array[idx] = static_cast<hip_bfloat16>(static_cast<float>(idx));
52-
53-
} else
54-
#endif
55-
if constexpr (std::is_same_v<remove_complex<ValueType>,
56-
float16> ||
57-
std::is_same_v<remove_complex<ValueType>,
58-
bfloat16>) {
45+
if constexpr (std::is_same_v<remove_complex<ValueType>, float16> ||
46+
std::is_same_v<remove_complex<ValueType>, bfloat16>) {
5947
// __half can not be from int64_t
6048
// __hip_bfloat16 can not be from long long
6149
array[idx] = static_cast<float>(idx);

common/unified/components/precision_conversion_kernels.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2026 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -20,8 +20,7 @@ void convert_precision(std::shared_ptr<const DefaultExecutor> exec,
2020
run_kernel(
2121
exec,
2222
[] GKO_KERNEL(auto idx, auto in, auto out) {
23-
#if defined(GKO_COMPILING_DPCPP) || \
24-
(defined(GKO_COMPILING_HIP) && HIP_VERSION >= 60200000) || \
23+
#if defined(GKO_COMPILING_DPCPP) || defined(GKO_COMPILING_HIP) || \
2524
(defined(CUDA_VERSION) && CUDA_VERSION < 12020)
2625
using bridge_type =
2726
device_type<highest_precision<SourceType, TargetType>>;

0 commit comments

Comments
 (0)