Skip to content

Commit 7ca0c9c

Browse files
authored
hip: use fnuz fp8 for conversion on CDNA3 (ggml-org#21040)
1 parent 8c60b8a commit 7ca0c9c

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

ggml/src/ggml-cuda/common.cuh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,13 @@ static __device__ __forceinline__ float ggml_cuda_e8m0_to_fp32(uint8_t x) {
802802
static __device__ __forceinline__ float ggml_cuda_ue4m3_to_fp32(uint8_t x) {
803803
#ifdef FP8_AVAILABLE
804804
const uint32_t bits = x * (x != 0x7F && x != 0xFF); // Convert NaN to 0.0f to match CPU implementation.
805+
#if defined(GGML_USE_HIP) && defined(CDNA3)
806+
// ROCm dose not support fp8 in software on devices with fp8 hardware,
807+
// but CDNA3 supports only e4m3_fnuz (no inf).
808+
const __hip_fp8_e4m3_fnuz xf = *reinterpret_cast<const __hip_fp8_e4m3_fnuz *>(&bits);
809+
#else
805810
const __nv_fp8_e4m3 xf = *reinterpret_cast<const __nv_fp8_e4m3 *>(&bits);
811+
#endif // defined(GGML_USE_HIP) && defined(GGML_USE_HIP)
806812
return static_cast<float>(xf) / 2;
807813
#else
808814
NO_DEVICE_CODE;

0 commit comments

Comments
 (0)