Skip to content

Commit 71d6065

Browse files
justinchubytitaiwangms
authored andcommitted
Clarify that FLOAT4E2M1 can be in int32_data (onnx#6640)
### Description Clarify in spec proto that FLOAT4E2M1 can be in int32_data, according to test usage. Updated text in the spec for int32_data for better readability and accuracy. ### Motivation and Context Previously the spec was incomplete according to the added tests. FLOAT4E2M1 was not yet released so the change should not require a new IR version. --------- Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com> Signed-off-by: titaiwangms <titaiwang@microsoft.com>
1 parent 1424dd7 commit 71d6065

File tree

5 files changed

+60
-30
lines changed

5 files changed

+60
-30
lines changed

onnx/onnx-ml.proto

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -582,13 +582,19 @@ message TensorProto {
582582
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
583583
repeated float float_data = 4 [packed = true];
584584

585-
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
586-
// float16 and float8 values must be bit-wise converted to an uint16_t prior
587-
// to writing to the buffer.
588-
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
589-
// the 4 LSB and the second element is stored in the 4 MSB.
585+
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
586+
// - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
587+
// representation before being written to the buffer.
588+
// - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
589+
// The first element is stored in the 4 least significant bits (LSB),
590+
// and the second element is stored in the 4 most significant bits (MSB).
591+
//
592+
// Consequently:
593+
// - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
594+
// - For 4-bit data types, each `int32_data` stores two elements.
595+
//
590596
// When this field is present, the data_type field MUST be
591-
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
597+
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
592598
repeated int32 int32_data = 5 [packed = true];
593599

594600
// For strings.

onnx/onnx-ml.proto3

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -582,13 +582,19 @@ message TensorProto {
582582
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
583583
repeated float float_data = 4 [packed = true];
584584

585-
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
586-
// float16 and float8 values must be bit-wise converted to an uint16_t prior
587-
// to writing to the buffer.
588-
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
589-
// the 4 LSB and the second element is stored in the 4 MSB.
585+
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
586+
// - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
587+
// representation before being written to the buffer.
588+
// - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
589+
// The first element is stored in the 4 least significant bits (LSB),
590+
// and the second element is stored in the 4 most significant bits (MSB).
591+
//
592+
// Consequently:
593+
// - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
594+
// - For 4-bit data types, each `int32_data` stores two elements.
595+
//
590596
// When this field is present, the data_type field MUST be
591-
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
597+
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
592598
repeated int32 int32_data = 5 [packed = true];
593599

594600
// For strings.

onnx/onnx.in.proto

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -579,13 +579,19 @@ message TensorProto {
579579
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
580580
repeated float float_data = 4 [packed = true];
581581

582-
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
583-
// float16 and float8 values must be bit-wise converted to an uint16_t prior
584-
// to writing to the buffer.
585-
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
586-
// the 4 LSB and the second element is stored in the 4 MSB.
582+
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
583+
// - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
584+
// representation before being written to the buffer.
585+
// - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
586+
// The first element is stored in the 4 least significant bits (LSB),
587+
// and the second element is stored in the 4 most significant bits (MSB).
588+
//
589+
// Consequently:
590+
// - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
591+
// - For 4-bit data types, each `int32_data` stores two elements.
592+
//
587593
// When this field is present, the data_type field MUST be
588-
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
594+
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
589595
repeated int32 int32_data = 5 [packed = true];
590596

591597
// For strings.

onnx/onnx.proto

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -580,13 +580,19 @@ message TensorProto {
580580
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
581581
repeated float float_data = 4 [packed = true];
582582

583-
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
584-
// float16 and float8 values must be bit-wise converted to an uint16_t prior
585-
// to writing to the buffer.
586-
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
587-
// the 4 LSB and the second element is stored in the 4 MSB.
583+
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
584+
// - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
585+
// representation before being written to the buffer.
586+
// - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
587+
// The first element is stored in the 4 least significant bits (LSB),
588+
// and the second element is stored in the 4 most significant bits (MSB).
589+
//
590+
// Consequently:
591+
// - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
592+
// - For 4-bit data types, each `int32_data` stores two elements.
593+
//
588594
// When this field is present, the data_type field MUST be
589-
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
595+
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
590596
repeated int32 int32_data = 5 [packed = true];
591597

592598
// For strings.

onnx/onnx.proto3

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -580,13 +580,19 @@ message TensorProto {
580580
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
581581
repeated float float_data = 4 [packed = true];
582582

583-
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
584-
// float16 and float8 values must be bit-wise converted to an uint16_t prior
585-
// to writing to the buffer.
586-
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
587-
// the 4 LSB and the second element is stored in the 4 MSB.
583+
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, (b)float16, float8, and float4:
584+
// - (b)float16 and float8 values MUST be converted bit-wise into an unsigned integer
585+
// representation before being written to the buffer.
586+
// - Each pair of uint4, int4, and float4 values MUST be packed as two 4-bit elements into a single byte.
587+
// The first element is stored in the 4 least significant bits (LSB),
588+
// and the second element is stored in the 4 most significant bits (MSB).
589+
//
590+
// Consequently:
591+
// - For data types with a bit-width of 8 or greater, each `int32_data` stores one element.
592+
// - For 4-bit data types, each `int32_data` stores two elements.
593+
//
588594
// When this field is present, the data_type field MUST be
589-
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
595+
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ, FLOAT4E2M1
590596
repeated int32 int32_data = 5 [packed = true];
591597

592598
// For strings.

0 commit comments

Comments
 (0)