Clarify that FLOAT4E2M1 can be in int32_data (onnx#6640)

justinchuby · titaiwangms · commit 71d60659138b · 2025-01-22T23:55:50.000Z
### Description
Clarify in spec proto that FLOAT4E2M1 can be in int32_data, according to
test usage.
Updated text in the spec for int32_data for better readability and
accuracy.

### Motivation and Context

Previously the spec was incomplete according to the added tests.
FLOAT4E2M1 was not yet released so the change should not require a new
IR version.

---------

Signed-off-by: Justin Chu &lt;justinchuby@users.noreply.github.com&gt;
Signed-off-by: titaiwangms &lt;titaiwang@microsoft.com&gt;
diff --git a/onnx/onnx-ml.proto b/onnx/onnx-ml.proto
@@ -582,13 +582,19 @@ message TensorProto {
   // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
   repeated float float_data = 4 [packed = true];
 
-  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
-  // float16 and float8 values must be bit-wise converted to an uint16_t prior
-  // to writing to the buffer.
-  // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
-  // the 4 LSB and the second element is stored in the 4 MSB.
+  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
+  // - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
+  //   representation before being written to the buffer.
+  // - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
+  //   The first element is stored in the 4 least significant bits (LSB),
+  //   and the second element is stored in the 4 most significant bits (MSB).
+  //
+  // Consequently:
+  // - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
+  // - For 4-bit data types, each `int32_data` stores two elements.
+  //
   // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
   repeated int32 int32_data = 5 [packed = true];
 
   // For strings.
diff --git a/onnx/onnx-ml.proto3 b/onnx/onnx-ml.proto3
@@ -582,13 +582,19 @@ message TensorProto {
   // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
   repeated float float_data = 4 [packed = true];
 
-  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
-  // float16 and float8 values must be bit-wise converted to an uint16_t prior
-  // to writing to the buffer.
-  // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
-  // the 4 LSB and the second element is stored in the 4 MSB.
+  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
+  // - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
+  //   representation before being written to the buffer.
+  // - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
+  //   The first element is stored in the 4 least significant bits (LSB),
+  //   and the second element is stored in the 4 most significant bits (MSB).
+  //
+  // Consequently:
+  // - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
+  // - For 4-bit data types, each `int32_data` stores two elements.
+  //
   // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
   repeated int32 int32_data = 5 [packed = true];
 
   // For strings.
diff --git a/onnx/onnx.in.proto b/onnx/onnx.in.proto
@@ -579,13 +579,19 @@ message TensorProto {
   // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
   repeated float float_data = 4 [packed = true];
 
-  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
-  // float16 and float8 values must be bit-wise converted to an uint16_t prior
-  // to writing to the buffer.
-  // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
-  // the 4 LSB and the second element is stored in the 4 MSB.
+  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
+  // - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
+  //   representation before being written to the buffer.
+  // - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
+  //   The first element is stored in the 4 least significant bits (LSB),
+  //   and the second element is stored in the 4 most significant bits (MSB).
+  //
+  // Consequently:
+  // - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
+  // - For 4-bit data types, each `int32_data` stores two elements.
+  //
   // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
   repeated int32 int32_data = 5 [packed = true];
 
   // For strings.
diff --git a/onnx/onnx.proto b/onnx/onnx.proto
@@ -580,13 +580,19 @@ message TensorProto {
   // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
   repeated float float_data = 4 [packed = true];
 
-  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
-  // float16 and float8 values must be bit-wise converted to an uint16_t prior
-  // to writing to the buffer.
-  // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
-  // the 4 LSB and the second element is stored in the 4 MSB.
+  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
+  // - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
+  //   representation before being written to the buffer.
+  // - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
+  //   The first element is stored in the 4 least significant bits (LSB),
+  //   and the second element is stored in the 4 most significant bits (MSB).
+  //
+  // Consequently:
+  // - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
+  // - For 4-bit data types, each `int32_data` stores two elements.
+  //
   // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
   repeated int32 int32_data = 5 [packed = true];
 
   // For strings.
diff --git a/onnx/onnx.proto3 b/onnx/onnx.proto3
@@ -580,13 +580,19 @@ message TensorProto {
   // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
   repeated float float_data = 4 [packed = true];
 
-  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
-  // float16 and float8 values must be bit-wise converted to an uint16_t prior
-  // to writing to the buffer.
-  // uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
-  // the 4 LSB and the second element is stored in the 4 MSB.
+  // For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
+  // - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
+  //   representation before being written to the buffer.
+  // - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
+  //   The first element is stored in the 4 least significant bits (LSB),
+  //   and the second element is stored in the 4 most significant bits (MSB).
+  //
+  // Consequently:
+  // - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
+  // - For 4-bit data types, each `int32_data` stores two elements.
+  //
   // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+  // INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
   repeated int32 int32_data = 5 [packed = true];
 
   // For strings.