feat: add float8 dtype support

xpislp · xpislp · commit b79515c3ed0a · 2026-01-10T10:51:09.000+08:00
diff --git a/tester/api_config/config_analyzer.py b/tester/api_config/config_analyzer.py
@@ -125,6 +125,14 @@ def convert_dtype_to_torch_type(self, dtype):
             return torch.complex64
         elif dtype in ["complex128", numpy.complex128]:
             return torch.complex128
+        elif dtype in ["float8_e4m3fn"]:
+            if hasattr(torch, "float8_e4m3fn"):
+                return torch.float8_e4m3fn
+            return torch.float32  # fallback
+        elif dtype in ["float8_e5m2"]:
+            if hasattr(torch, "float8_e5m2"):
+                return torch.float8_e5m2
+            return torch.float32  # fallback
         else:
             raise ValueError(f"Unsupport dtype: {dtype}")
 
@@ -206,12 +214,15 @@ def get_numpy_tensor(self, api_config, index=None, key=None, **kwargs):
         if key is not None:
             self.key = key
 
-        if self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
-            print("Warning ", self.dtype, "not supported")
-            return
+        # if self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
+        #     print("Warning ", self.dtype, "not supported")
+        #     return
 
         original_dtype = self.dtype
-        self.dtype = "float32" if self.dtype == "bfloat16" else self.dtype
+        if self.dtype == "bfloat16":
+            self.dtype = "float32"
+        elif self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
+            self.dtype = "float32"
 
         if self.numpy_tensor is None:
             if api_config.api_name in not_zero_apis:
@@ -2621,39 +2632,67 @@ def get_exponent_max(value, dtype_max, default_max=5):
                             self.dtype
                         )
 
+        if original_dtype == "float8_e4m3fn":
+            self.numpy_tensor = numpy.clip(self.numpy_tensor, -448, 448)
+        elif original_dtype == "float8_e5m2":
+            self.numpy_tensor = numpy.clip(self.numpy_tensor, -57344, 57344)
+
         self.dtype = original_dtype
         return self.numpy_tensor
 
     def get_paddle_tensor(self, api_config):
-        if self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
-            print("Warning ", self.dtype, "not supported")
-            return
+        # if self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
+        #     print("Warning ", self.dtype, "not supported")
+        #     return
 
         if self.paddle_tensor is None:
+            np_tensor = self.get_numpy_tensor(api_config)
+            #print(f"[DEBUG] Numpy Tensor for {self.dtype}: {np_tensor} dtype={np_tensor.dtype}")
+            
+            # Use float32 as intermediate for float8
+            intermediate_dtype = self.dtype
+            if self.dtype == "bfloat16":
+                intermediate_dtype = "float32"
+            elif self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
+                intermediate_dtype = "float32"
+
             self.paddle_tensor = paddle.to_tensor(
-                self.get_numpy_tensor(api_config),
-                dtype="float32" if self.dtype == "bfloat16" else self.dtype,
+                np_tensor,
+                dtype=intermediate_dtype,
                 place=self.place,
             )
 
             self.paddle_tensor.stop_gradient = False
             if self.dtype == "bfloat16":
                 self.paddle_tensor = paddle.cast(self.paddle_tensor, dtype="bfloat16")
+            elif self.dtype == "float8_e4m3fn":
+                #print(f"[DEBUG] Before Paddle Cast (float8_e4m3fn): {self.paddle_tensor}\n[DEBUG] dtype check: {self.paddle_tensor.dtype}", flush=True)
+                self.paddle_tensor = paddle.cast(self.paddle_tensor, dtype="float8_e4m3fn")
+                #print(f"[DEBUG] Forward Paddle Input Tensor (float8_e4m3fn): {self.paddle_tensor}\n[DEBUG] dtype check: {self.paddle_tensor.dtype}", flush=True)
+            elif self.dtype == "float8_e5m2":
+                #print(f"[DEBUG] Before Paddle Cast (float8_e5m2): {self.paddle_tensor}\n[DEBUG] dtype check: {self.paddle_tensor.dtype}", flush=True)
+                self.paddle_tensor = paddle.cast(self.paddle_tensor, dtype="float8_e5m2")
+                #print(f"[DEBUG] Forward Paddle Input Tensor (float8_e5m2): {self.paddle_tensor}\n[DEBUG] dtype check: {self.paddle_tensor.dtype}", flush=True)
         return self.paddle_tensor
 
     def get_torch_tensor(self, api_config):
-        if self.dtype in ["float8_e5m2", "float8_e4m3fn"]:
-            print("Warning ", self.dtype, "not supported")
-            return
+        # if self.dtype in ["float8_e5m2"]:
+        #     print("Warning ", self.dtype, "not supported")
+        #     return
 
         device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
         torch.set_default_device(device)
         if self.torch_tensor is None:
+            if self.dtype in ["bfloat16", "float8_e4m3fn", "float8_e5m2"]:
+                dtype_to_use = torch.float32
+            else:
+                dtype_to_use = self.convert_dtype_to_torch_type(self.dtype)
+
+            #print(f"[DEBUG] Preparing Torch Tensor for {self.dtype}, using initial dtype {dtype_to_use}")
+            
             self.torch_tensor = torch.tensor(
                 self.get_numpy_tensor(api_config),
-                dtype=self.convert_dtype_to_torch_type(self.dtype)
-                if self.dtype != "bfloat16"
-                else torch.float32,
+                dtype=dtype_to_use,
                 requires_grad=self.dtype
                 in [
                     "float32",
@@ -2666,6 +2705,21 @@ def get_torch_tensor(self, api_config):
             )
             if self.dtype == "bfloat16":
                 self.torch_tensor = self.torch_tensor.to(dtype=torch.bfloat16)
+            elif self.dtype == "float8_e4m3fn":
+                if hasattr(torch, "float8_e4m3fn"):
+                    #print(f"[DEBUG] Before Torch Cast (float8_e4m3fn): {self.torch_tensor.dtype} data={self.torch_tensor}", flush=True)
+                    self.torch_tensor = self.torch_tensor.to(dtype=torch.float8_e4m3fn)
+                   # print(f"[DEBUG] Forward Torch Input Tensor (float8_e4m3fn): {self.torch_tensor}\n[DEBUG] dtype check: {self.torch_tensor.dtype}", flush=True)
+                else:
+                    print("[DEBUG] Warning: Current torch version does not support float8_e4m3fn, keep float32/float16.", flush=True)
+            elif self.dtype == "float8_e5m2":
+                if hasattr(torch, "float8_e5m2"):
+                    #print(f"[DEBUG] Before Torch Cast (float8_e5m2): {self.torch_tensor.dtype} data={self.torch_tensor}", flush=True)
+                    self.torch_tensor = self.torch_tensor.to(dtype=torch.float8_e5m2)
+                   # print(f"[DEBUG] Forward Torch Input Tensor (float8_e5m2): {self.torch_tensor}\n[DEBUG] dtype check: {self.torch_tensor.dtype}", flush=True)
+                else:
+                    print("[DEBUG] Warning: Current torch version does not support float8_e5m2, keep float32/float16.", flush=True)
+
         return self.torch_tensor
 
     def clear_tensor(self):
diff --git a/tester/base.py b/tester/base.py
@@ -82,7 +82,7 @@ def need_skip(self, paddle_only=False):
             return True
         for i in range(len(self.api_config.args)):
             if isinstance(self.api_config.args[i], TensorConfig):
-                if self.api_config.args[i].dtype in ["float8_e5m2", "float8_e4m3fn"]:
+                if self.api_config.args[i].dtype in ["float8_e5m2"]:
                     return True
             elif isinstance(self.api_config.args[i], list) or isinstance(
                 self.api_config.args[i], tuple
@@ -91,31 +91,26 @@ def need_skip(self, paddle_only=False):
                     if isinstance(self.api_config.args[i][j], TensorConfig):
                         if self.api_config.args[i][j].dtype in [
                             "float8_e5m2",
-                            "float8_e4m3fn",
                         ]:
                             return True
             elif self.api_config.args[i] in [
-                paddle.base.core.DataType.FLOAT8_E4M3FN,
                 paddle.base.core.DataType.FLOAT8_E5M2,
                 "float8_e5m2",
-                "float8_e4m3fn",
             ]:
                 return True
 
         for _key, arg_config in self.api_config.kwargs.items():
             if isinstance(arg_config, TensorConfig):
-                if arg_config.dtype in ["float8_e5m2", "float8_e4m3fn"]:
+                if arg_config.dtype in ["float8_e5m2"]:
                     return True
             elif isinstance(arg_config, (list, tuple)):
                 for i in range(len(arg_config)):
                     if isinstance(arg_config[i], TensorConfig):
-                        if arg_config[i].dtype in ["float8_e5m2", "float8_e4m3fn"]:
+                        if arg_config[i].dtype in ["float8_e5m2"]:
                             return True
             elif arg_config in [
-                paddle.base.core.DataType.FLOAT8_E4M3FN,
                 paddle.base.core.DataType.FLOAT8_E5M2,
                 "float8_e5m2",
-                "float8_e4m3fn",
             ]:
                 return True
 
@@ -617,26 +612,39 @@ def gen_paddle_output_and_output_grad(self, outputs):
             for output in result_outputs:
                 dtype = str(output.dtype).split(".")[-1]
                 if USE_CACHED_NUMPY:
-                    dtype = "float32" if dtype == "bfloat16" else dtype
+                    dtype = (
+                        "float32"
+                        if dtype in ["bfloat16", "float8_e4m3fn", "float8_e5m2"]
+                        else dtype
+                    )
                     numpy_tensor = self.get_cached_numpy(dtype, output.shape)
                 else:
                     if "int" in dtype:
                         numpy_tensor = (
                             numpy.random.randint(-65535, 65535, size=output.shape)
                         ).astype(dtype)
                     else:
-                        dtype = "float32" if dtype == "bfloat16" else dtype
+                        dtype = (
+                            "float32"
+                            if dtype in ["bfloat16", "float8_e4m3fn", "float8_e5m2"]
+                            else dtype
+                        )
                         numpy_tensor = (numpy.random.random(output.shape) - 0.5).astype(dtype)
                 self.outputs_grad_numpy.append(numpy_tensor)
         for i, numpy_tensor in enumerate(self.outputs_grad_numpy):
             dtype = str(result_outputs[i].dtype).split(".")[-1]
             result_output_grad = paddle.to_tensor(
                 numpy_tensor,
-                dtype=dtype if dtype != "bfloat16" else "float32",
+                dtype=dtype
+                if dtype not in ["bfloat16", "float8_e4m3fn", "float8_e5m2"]
+                else "float32",
             )
             result_output_grad.stop_gradient = False
             if dtype == "bfloat16":
                 result_output_grad = paddle.cast(result_output_grad, dtype="bfloat16")
+            elif dtype == "float8_e4m3fn":
+                result_output_grad = paddle.cast(result_output_grad, dtype="float8_e4m3fn")
+                #print(f"[DEBUG] Backward Paddle Grad Tensor (float8_e4m3fn): {result_output_grad}\n[DEBUG] dtype check: {result_output_grad.dtype}", flush=True)
             result_outputs_grads.append(result_output_grad)
         return result_outputs, result_outputs_grads
 
@@ -662,27 +670,43 @@ def gen_torch_output_and_output_grad(self, outputs):
             for output in result_outputs:
                 dtype = str(output.dtype).split(".")[-1]
                 if USE_CACHED_NUMPY:
-                    dtype = "float32" if dtype == "bfloat16" else dtype
+                    dtype = (
+                        "float32"
+                        if dtype in ["bfloat16", "float8_e4m3fn"]
+                        else dtype
+                    )
                     numpy_tensor = self.get_cached_numpy(dtype, output.shape)
                 else:
                     if "int" in dtype:
                         numpy_tensor = (
                             numpy.random.randint(-65535, 65535, size=output.shape)
                         ).astype(dtype)
                     else:
-                        dtype = "float32" if dtype == "bfloat16" else dtype
+                        dtype = (
+                            "float32"
+                            if dtype in ["bfloat16", "float8_e4m3fn"]
+                            else dtype
+                        )
                         numpy_tensor = (numpy.random.random(output.shape) - 0.5).astype(dtype)
                 self.outputs_grad_numpy.append(numpy_tensor)
         for i, numpy_tensor in enumerate(self.outputs_grad_numpy):
             dtype = str(result_outputs[i].dtype).split(".")[1]
+            dtype_to_use = (
+                torch.float32
+                if dtype in ["bfloat16", "float8_e4m3fn"]
+                else self.convert_dtype_to_torch_type(dtype)
+            )
             result_output_grad = torch.tensor(
                 numpy_tensor,
-                dtype=self.convert_dtype_to_torch_type(dtype)
-                if dtype != "bfloat16"
-                else torch.float32,
+                dtype=dtype_to_use,
             )
             if dtype == "bfloat16":
                 result_output_grad = result_output_grad.to(dtype=torch.bfloat16)
+            elif dtype == "float8_e4m3fn":
+                if hasattr(torch, "float8_e4m3fn"):
+                    result_output_grad = result_output_grad.to(dtype=torch.float8_e4m3fn)
+                else:
+                    result_output_grad = result_output_grad.to(dtype=torch.float16)
             result_outputs_grads.append(result_output_grad)
         return result_outputs, result_outputs_grads
 
@@ -780,8 +804,14 @@ def convert_dtype_to_torch_type(self, dtype):
             complex,
         ]:
             return torch.complex128
-        elif dtype is None:
-            return None
+        elif dtype in ["float8_e4m3fn"]:
+            if hasattr(torch, "float8_e4m3fn"):
+                return torch.float8_e4m3fn
+            return torch.float32
+        elif dtype in ["float8_e5m2"]:
+            if hasattr(torch, "float8_e5m2"):
+                return torch.float8_e5m2
+            return torch.float32
         else:
             raise ValueError(f"Unsupport dtype: {dtype}")
 
@@ -1033,6 +1063,15 @@ def error_msg(msg):
         is_backward = getattr(self, "is_backward", False)
         if test_tol:
             atol, rtol = 0.0, 0.0
+        
+        # [DEBUG] Print tensors before assertion
+        if str(torch_tensor.dtype).endswith("float8_e4m3fn"):
+            print(f"\n[DEBUG] Comparing Float8 Tensors:", flush=True)
+            print(f"[DEBUG] Converted Paddle Tensor: {converted_paddle_tensor}", flush=True)
+            print(f"[DEBUG] Paddle dtype: {converted_paddle_tensor.dtype}", flush=True)
+            print(f"[DEBUG] Benchmark Torch Tensor: {torch_tensor}", flush=True)
+            print(f"[DEBUG] Torch dtype: {torch_tensor.dtype}", flush=True)
+
         try:
             torch.testing.assert_close(
                 converted_paddle_tensor,