Add support for FLOAT8E8M0 data type (onnx#128)

Copilot · justinchuby · web-flow · commit 329a79853286 · 2025-07-14T13:45:50.000-07:00
This PR adds comprehensive support for the FLOAT8E8M0 data type that was added to ONNX in onnx/onnx#7030. ## Changes Made - **Added FLOAT8E8M0 enum value**: Set to 24 (next available value after FLOAT4E2M1=23) - **Updated numpy type mapping**: Added support for `ml_dtypes.float8_e8m0fnu` - **Added type properties**: Configured as 8-bit floating point, signed type - **Added short name**: "f8e8m0" for compact representation - **Updated serialization**: Added FLOAT8E8M0 to appropriate sets in `serde.py` for proper tensor serialization/deserialization - **Added tests**: Included parameterized test case and conditional ONNX compatibility check ## Testing The implementation includes comprehensive testing: ```python import onnx_ir._enums as enums import ml_dtypes import numpy as np # Create tensor with FLOAT8E8M0 type data = np.array([1.0, 2.0, 3.0], dtype=ml_dtypes.float8_e8m0fnu) tensor = ir_core.Tensor(data) assert tensor.dtype == enums.DataType.FLOAT8E8M0 # Test properties assert enums.DataType.FLOAT8E8M0.is_floating_point() == True assert enums.DataType.FLOAT8E8M0.bitwidth == 8 assert enums.DataType.FLOAT8E8M0.short_name() == 'f8e8m0' # Test serialization round-trip tensor_proto = serde.serialize_tensor(tensor) assert tensor_proto.data_type == 24 ``` All existing tests continue to pass, ensuring no regression in functionality. Fixes onnx#127.  --- 💡 You can make Copilot smarter by setting up custom instructions, customizing its development environment and configuring Model Context Protocol (MCP) servers. Learn more [Copilot coding agent tips](https://gh.io/copilot-coding-agent-tips) in the docs. --------- Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com> Co-authored-by: Justin Chu <justinchuby@users.noreply.github.com>
diff --git a/src/onnx_ir/_core.py b/src/onnx_ir/_core.py
@@ -78,6 +78,7 @@
         _enums.DataType.FLOAT8E4M3FNUZ,
         _enums.DataType.FLOAT8E5M2,
         _enums.DataType.FLOAT8E5M2FNUZ,
+        _enums.DataType.FLOAT8E8M0,
         _enums.DataType.INT4,
         _enums.DataType.UINT4,
         _enums.DataType.FLOAT4E2M1,
@@ -261,6 +262,7 @@ def _check_numpy_representation_type(array: np.ndarray, dtype: _enums.DataType)
             ml_dtypes.float8_e4m3fn,
             ml_dtypes.float8_e5m2fnuz,
             ml_dtypes.float8_e5m2,
+            ml_dtypes.float8_e8m0fnu,
         ):
             raise TypeError(
                 f"The numpy array dtype must be uint8 or ml_dtypes.float8* (not {array.dtype}) for IR data type {dtype}."
@@ -319,6 +321,8 @@ def _maybe_view_np_array_with_ml_dtypes(
         return array.view(ml_dtypes.float8_e5m2)
     if dtype == _enums.DataType.FLOAT8E5M2FNUZ:
         return array.view(ml_dtypes.float8_e5m2fnuz)
+    if dtype == _enums.DataType.FLOAT8E8M0:
+        return array.view(ml_dtypes.float8_e8m0fnu)
     if dtype == _enums.DataType.INT4:
         return array.view(ml_dtypes.int4)
     if dtype == _enums.DataType.UINT4:
diff --git a/src/onnx_ir/_core_test.py b/src/onnx_ir/_core_test.py
@@ -52,6 +52,7 @@ def test_init_requires_type_when_value_is_not_np_array(self):
             ("float8e4m3fnuz", np.uint8, ir.DataType.FLOAT8E4M3FNUZ),
             ("float8e5m2", np.uint8, ir.DataType.FLOAT8E5M2),
             ("float8e5m2fnuz", np.uint8, ir.DataType.FLOAT8E5M2FNUZ),
+            ("float8e8m0", np.uint8, ir.DataType.FLOAT8E8M0),
             ("int4", np.int8, ir.DataType.INT4),
             ("int4_uint8", np.uint8, ir.DataType.INT4),
             ("uint4", np.uint8, ir.DataType.UINT4),
@@ -396,15 +397,28 @@ def test_external_tensor_bfloat16(self):
                 ir.DataType.FLOAT8E5M2FNUZ,
                 ml_dtypes.float8_e5m2fnuz,
             ),
+            (
+                "FLOAT8E8M0",
+                ir.DataType.FLOAT8E8M0,
+                ml_dtypes.float8_e8m0fnu,
+            ),
         ]
     )
     def test_external_tensor_float8(self, _: str, dtype: ir.DataType, np_dtype):
-        expected_array = np.array(
-            [[-3.0, -1.0, -0.5, -0.0, +0.0, 0.5, 1.0, 40.0, 2.0]]
-        ).astype(np_dtype)
-        tensor_proto = ir.serde.serialize_tensor(
-            ir.Tensor(expected_array.view(np.uint8), dtype=dtype)
-        )
+        # FLOAT8E8M0 has different precision characteristics (8 exponent bits, 0 mantissa bits)
+        # It can only represent powers of 2 and special values
+        if dtype == ir.DataType.FLOAT8E8M0:
+            expected_array = np.array(
+                [[0.5, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0]]
+            ).astype(np_dtype)
+            tensor_proto = ir.serde.serialize_tensor(ir.Tensor(expected_array, dtype=dtype))
+        else:
+            expected_array = np.array(
+                [[-3.0, -1.0, -0.5, -0.0, +0.0, 0.5, 1.0, 40.0, 2.0]]
+            ).astype(np_dtype)
+            tensor_proto = ir.serde.serialize_tensor(
+                ir.Tensor(expected_array.view(np.uint8), dtype=dtype)
+            )
         with tempfile.TemporaryDirectory() as temp_dir:
             _to_external_tensor(tensor_proto, temp_dir, "tensor.bin")
             tensor = ir.serde.deserialize_tensor(tensor_proto, temp_dir)
diff --git a/src/onnx_ir/_enums.py b/src/onnx_ir/_enums.py
@@ -65,6 +65,7 @@ class DataType(enum.IntEnum):
     UINT4 = 21
     INT4 = 22
     FLOAT4E2M1 = 23
+    FLOAT8E8M0 = 24
 
     @classmethod
     def from_numpy(cls, dtype: np.dtype) -> DataType:
@@ -167,6 +168,7 @@ def is_floating_point(self) -> bool:
             DataType.FLOAT8E5M2,
             DataType.FLOAT8E5M2FNUZ,
             DataType.FLOAT4E2M1,
+            DataType.FLOAT8E8M0,
         }
 
     def is_integer(self) -> bool:
@@ -209,6 +211,7 @@ def is_signed(self) -> bool:
             DataType.FLOAT8E5M2FNUZ,
             DataType.INT4,
             DataType.FLOAT4E2M1,
+            DataType.FLOAT8E8M0,
         }
 
     def __repr__(self) -> str:
@@ -241,6 +244,7 @@ def __str__(self) -> str:
     DataType.UINT4: 4,
     DataType.INT4: 4,
     DataType.FLOAT4E2M1: 4,
+    DataType.FLOAT8E8M0: 8,
 }
 
 
@@ -266,6 +270,7 @@ def __str__(self) -> str:
     np.dtype(ml_dtypes.float8_e4m3fnuz): DataType.FLOAT8E4M3FNUZ,
     np.dtype(ml_dtypes.float8_e5m2): DataType.FLOAT8E5M2,
     np.dtype(ml_dtypes.float8_e5m2fnuz): DataType.FLOAT8E5M2FNUZ,
+    np.dtype(ml_dtypes.float8_e8m0fnu): DataType.FLOAT8E8M0,
     np.dtype(ml_dtypes.int4): DataType.INT4,
     np.dtype(ml_dtypes.uint4): DataType.UINT4,
 }
@@ -290,6 +295,7 @@ def __str__(self) -> str:
     DataType.FLOAT8E5M2: "f8e5m2",
     DataType.FLOAT8E4M3FNUZ: "f8e4m3fnuz",
     DataType.FLOAT8E5M2FNUZ: "f8e5m2fnuz",
+    DataType.FLOAT8E8M0: "f8e8m0",
     DataType.FLOAT4E2M1: "f4e2m1",
     DataType.COMPLEX64: "c64",
     DataType.COMPLEX128: "c128",
diff --git a/src/onnx_ir/_enums_test.py b/src/onnx_ir/_enums_test.py
@@ -38,6 +38,8 @@ def test_enums_are_the_same_as_spec(self):
         self.assertEqual(_enums.DataType.INT4, onnx.TensorProto.INT4)
         if hasattr(onnx.TensorProto, "FLOAT4E2M1"):
             self.assertEqual(_enums.DataType.FLOAT4E2M1, onnx.TensorProto.FLOAT4E2M1)
+        if hasattr(onnx.TensorProto, "FLOAT8E8M0"):
+            self.assertEqual(_enums.DataType.FLOAT8E8M0, onnx.TensorProto.FLOAT8E8M0)
         self.assertEqual(_enums.DataType.UNDEFINED, onnx.TensorProto.UNDEFINED)
 
     @parameterized.parameterized.expand(
@@ -73,6 +75,7 @@ def test_enums_are_the_same_as_spec(self):
             ("uint4", np.dtype(ml_dtypes.uint4), _enums.DataType.UINT4),
             ("int4", np.dtype(ml_dtypes.int4), _enums.DataType.INT4),
             ("float4e2m1", np.dtype(ml_dtypes.float4_e2m1fn), _enums.DataType.FLOAT4E2M1),
+            ("float8e8m0", np.dtype(ml_dtypes.float8_e8m0fnu), _enums.DataType.FLOAT8E8M0),
             (
                 "onnx_ref_bfloat16",
                 onnx._custom_element_types.bfloat16,
diff --git a/src/onnx_ir/serde.py b/src/onnx_ir/serde.py
@@ -405,6 +405,7 @@ def numpy(self) -> np.ndarray:
                 _enums.DataType.FLOAT8E4M3FNUZ,
                 _enums.DataType.FLOAT8E5M2,
                 _enums.DataType.FLOAT8E5M2FNUZ,
+                _enums.DataType.FLOAT8E8M0,
                 _enums.DataType.INT16,
                 _enums.DataType.INT32,
                 _enums.DataType.INT4,
@@ -505,6 +506,7 @@ def tobytes(self) -> bytes:
                 _enums.DataType.FLOAT8E4M3FNUZ,
                 _enums.DataType.FLOAT8E5M2,
                 _enums.DataType.FLOAT8E5M2FNUZ,
+                _enums.DataType.FLOAT8E8M0,
                 _enums.DataType.INT4,
                 _enums.DataType.UINT4,
                 _enums.DataType.FLOAT4E2M1,
diff --git a/src/onnx_ir/serde_test.py b/src/onnx_ir/serde_test.py
@@ -224,11 +224,34 @@ def test_tensor_proto_tensor_bfloat16(self):
                 onnx.TensorProto.FLOAT8E5M2FNUZ,
                 ml_dtypes.float8_e5m2fnuz,
             ),
+            (
+                "FLOAT8E8M0",
+                24,  # FLOAT8E8M0 value from the enum
+                ml_dtypes.float8_e8m0fnu,
+            ),
         ]
     )
     def test_tensor_proto_tensor_float8(self, _: str, dtype: int, np_dtype):
-        expected_array = np.array([[-3.0, -1.0, -0.5, -0.0, +0.0, 0.5, 1.0, 40.0, 2.0]])
-        tensor_proto = onnx.helper.make_tensor("test_tensor", dtype, [1, 9], expected_array)
+        # FLOAT8E8M0 has different precision characteristics (8 exponent bits, 0 mantissa bits)
+        # It can only represent powers of 2 and special values
+        if dtype == 24:  # FLOAT8E8M0
+            expected_array = np.array([[0.5, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0]])
+        else:
+            expected_array = np.array([[-3.0, -1.0, -0.5, -0.0, +0.0, 0.5, 1.0, 40.0, 2.0]])
+
+        # Handle the case where ONNX doesn't support FLOAT8E8M0 yet (value 24)
+        if dtype == 24:  # FLOAT8E8M0
+            # Create tensor proto manually since ONNX helper might not support this type yet
+            tensor_proto = onnx.TensorProto()
+            tensor_proto.name = "test_tensor"
+            tensor_proto.data_type = dtype
+            tensor_proto.dims[:] = [1, 9]
+            tensor_proto.raw_data = expected_array.astype(np_dtype).tobytes()
+        else:
+            tensor_proto = onnx.helper.make_tensor(
+                "test_tensor", dtype, [1, 9], expected_array
+            )
+
         tensor = serde.TensorProtoTensor(tensor_proto)
         np.testing.assert_array_equal(
             tensor.numpy().view(np_dtype).astype(np.float32), expected_array
@@ -371,6 +394,7 @@ def test_tensor_proto_tensor_empty_tensor(self):
                     ("FLOAT8E4M3FNUZ", ir.DataType.FLOAT8E4M3FNUZ),
                     ("FLOAT8E5M2", ir.DataType.FLOAT8E5M2),
                     ("FLOAT8E5M2FNUZ", ir.DataType.FLOAT8E5M2FNUZ),
+                    ("FLOAT8E8M0", ir.DataType.FLOAT8E8M0),
                     ("UINT4", ir.DataType.UINT4),
                     ("INT4", ir.DataType.INT4),
                     ("FLOAT4E2M1", ir.DataType.FLOAT4E2M1),
@@ -406,6 +430,7 @@ def test_round_trip_numpy_conversion_from_raw_data(
             ir.DataType.FLOAT8E5M2,
             ir.DataType.FLOAT8E4M3FN,
             ir.DataType.BFLOAT16,
+            ir.DataType.FLOAT8E8M0,
         }:
             # There is a bug in ml_dtypes that causes equality checks to fail for these dtypes
             # See https://github.com/jax-ml/ml_dtypes/issues/301
diff --git a/src/onnx_ir/tensor_adapters.py b/src/onnx_ir/tensor_adapters.py
@@ -68,6 +68,7 @@ def from_torch_dtype(dtype: torch.dtype) -> ir.DataType:
             torch.float8_e4m3fnuz: ir.DataType.FLOAT8E4M3FNUZ,
             torch.float8_e5m2: ir.DataType.FLOAT8E5M2,
             torch.float8_e5m2fnuz: ir.DataType.FLOAT8E5M2FNUZ,
+            torch.float8_e8m0fnu: ir.DataType.FLOAT8E8M0,
             torch.int16: ir.DataType.INT16,
             torch.int32: ir.DataType.INT32,
             torch.int64: ir.DataType.INT64,
@@ -104,6 +105,7 @@ def to_torch_dtype(dtype: ir.DataType) -> torch.dtype:
             ir.DataType.FLOAT8E4M3FNUZ: torch.float8_e4m3fnuz,
             ir.DataType.FLOAT8E5M2: torch.float8_e5m2,
             ir.DataType.FLOAT8E5M2FNUZ: torch.float8_e5m2fnuz,
+            ir.DataType.FLOAT8E8M0: torch.float8_e8m0fnu,
             ir.DataType.INT16: torch.int16,
             ir.DataType.INT32: torch.int32,
             ir.DataType.INT64: torch.int64,
@@ -142,6 +144,7 @@ def numpy(self) -> npt.NDArray:
             ir.DataType.FLOAT8E4M3FNUZ,
             ir.DataType.FLOAT8E5M2,
             ir.DataType.FLOAT8E5M2FNUZ,
+            ir.DataType.FLOAT8E8M0,
         }:
             return self.raw.view(torch.uint8).numpy(force=True).view(self.dtype.numpy())
 
diff --git a/src/onnx_ir/tensor_adapters_test.py b/src/onnx_ir/tensor_adapters_test.py
@@ -37,6 +37,7 @@ class TorchTensorTest(unittest.TestCase):
             (torch.float8_e4m3fnuz, ml_dtypes.float8_e4m3fnuz),
             (torch.float8_e5m2, ml_dtypes.float8_e5m2),
             (torch.float8_e5m2fnuz, ml_dtypes.float8_e5m2fnuz),
+            (torch.float8_e8m0fnu, ml_dtypes.float8_e8m0fnu),
             (torch.int16, np.int16),
             (torch.int32, np.int32),
             (torch.int64, np.int64),
@@ -66,6 +67,7 @@ def test_numpy_returns_correct_dtype(self, dtype: torch.dtype, np_dtype):
             (torch.float8_e4m3fnuz,),
             (torch.float8_e5m2,),
             (torch.float8_e5m2fnuz,),
+            (torch.float8_e8m0fnu,),
             (torch.int16,),
             (torch.int32,),
             (torch.int64,),