File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -802,7 +802,13 @@ static __device__ __forceinline__ float ggml_cuda_e8m0_to_fp32(uint8_t x) {
802802static __device__ __forceinline__ float ggml_cuda_ue4m3_to_fp32 (uint8_t x) {
803803#ifdef FP8_AVAILABLE
804804 const uint32_t bits = x * (x != 0x7F && x != 0xFF ); // Convert NaN to 0.0f to match CPU implementation.
805+ #if defined(GGML_USE_HIP) && defined(CDNA3)
806+ // ROCm dose not support fp8 in software on devices with fp8 hardware,
807+ // but CDNA3 supports only e4m3_fnuz (no inf).
808+ const __hip_fp8_e4m3_fnuz xf = *reinterpret_cast <const __hip_fp8_e4m3_fnuz *>(&bits);
809+ #else
805810 const __nv_fp8_e4m3 xf = *reinterpret_cast <const __nv_fp8_e4m3 *>(&bits);
811+ #endif // defined(GGML_USE_HIP) && defined(GGML_USE_HIP)
806812 return static_cast <float >(xf) / 2 ;
807813#else
808814 NO_DEVICE_CODE;
You can’t perform that action at this time.
0 commit comments