Skip to content

Commit 760eea4

Browse files
authored
[AIX] test-suites failure fixes (#25791)
### Description This PR is to fix some of the test case failures mentioned in #25790 1. cmake/onnxruntime_unittests.cmake To fix the onnxruntime_shared_lib_test crash 2. include/onnxruntime/core/providers/utils/ort_graph_to_proto.h Added method for handling external data. For fixing , EpGraphTest.SerializeToProto_ConstantOfShape , EpGraphTest.SerializeToProto_Mnist, EpGraphTest.SerializeToProto_InputModelHasExternalIni 3. onnxruntime/core/framework/tensorprotoutils.cc For fixing many test failures , related to float16 or Int16 type. 4. onnxruntime/test/framework/endian_test.cc For fixing, ConvertRawDataInTensorProtoTest.FloatData and ConvertRawDataInTensorProtoTest.Int32Data ### Motivation and Context To fix the AIX (Big endian) related test failures.
1 parent 2214aab commit 760eea4

File tree

5 files changed

+236
-83
lines changed

5 files changed

+236
-83
lines changed

cmake/onnxruntime_unittests.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,6 +1616,10 @@ endif()
16161616
target_compile_definitions(onnxruntime_shared_lib_test PRIVATE USE_DUMMY_EXA_DEMANGLE=1)
16171617
endif()
16181618

1619+
if (CMAKE_SYSTEM_NAME MATCHES "AIX" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
1620+
set_target_properties(onnxruntime_shared_lib_test PROPERTIES ENABLE_EXPORTS 1)
1621+
endif()
1622+
16191623
if (IOS)
16201624
add_custom_command(
16211625
TARGET onnxruntime_shared_lib_test POST_BUILD

include/onnxruntime/core/providers/utils/ort_graph_to_proto.h

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,16 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
184184
Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
185185
onnx::ModelProto& model_proto,
186186
HandleInitializerDataFunc handle_initializer_data_func = nullptr);
187+
/// <summary>
188+
/// Convert the endianess of data based of tensor element type. Mainly used in BE systems.
189+
/// </summary>
190+
/// <param name="value_info">OrtValueInfo for the initializer. Can be used to query name, type, shape,
191+
/// and consumer nodes.</param>
192+
/// <param name="data">Pointer to data buffer.</param>
193+
/// <param name="bytes">Length of data buffer.</param>
194+
/// <returns>An Ort::Status indicating success or an error.</returns>
195+
Ort::Status ConvertExternalData(const OrtValueInfo* value_info, void* data, size_t bytes);
196+
187197
} // namespace OrtEpUtils
188198

189199
// End of header
@@ -229,6 +239,23 @@ static Ort::Status GetOrtValueInfoTensorTypeShape(Ort::ConstValueInfo vi,
229239
/*out*/ bool& has_shape);
230240
static Ort::Status OrtValueInfoToProto(Ort::ConstValueInfo ort_value_info, onnx::ValueInfoProto& value_info_proto);
231241
static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr ort_attr, onnx::AttributeProto& attr_proto);
242+
static Ort::Status GetTensorElementSize(const ONNXTensorElementDataType& element_type, size_t& element_size);
243+
static void SwapByteOrderInplace(void* data, const size_t& data_len, const size_t& element_size);
244+
245+
// Below endian enum class is referenced from include/onnxruntime/core/framework/endian.h
246+
enum class endian {
247+
#if defined(_WIN32)
248+
little = 0,
249+
big = 1,
250+
native = little,
251+
#elif defined(__GNUC__) || defined(__clang__)
252+
little = __ORDER_LITTLE_ENDIAN__,
253+
big = __ORDER_BIG_ENDIAN__,
254+
native = __BYTE_ORDER__,
255+
#else
256+
#error onnxruntime::endian is not implemented in this environment.
257+
#endif
258+
};
232259

233260
Ort::Status OrtGraphToProto(const OrtGraph& graph,
234261
onnx::GraphProto& graph_proto,
@@ -437,7 +464,17 @@ Ort::Status OrtGraphToProto(const OrtGraph& graph,
437464
} else {
438465
// User wants to store data inline the TensorProto's raw_data
439466
tensor_proto->set_data_location(onnx::TensorProto_DataLocation_DEFAULT);
440-
tensor_proto->set_raw_data(data, data_bytes);
467+
if constexpr (endian::native == endian::big) {
468+
size_t element_size = 0;
469+
GetTensorElementSize(initializer_elem_type, element_size);
470+
// create local copy of data and do endianess conversion
471+
auto raw_data_buf = std::make_unique<unsigned char[]>(data_bytes);
472+
std::memcpy(raw_data_buf.get(), data, data_bytes);
473+
SwapByteOrderInplace(raw_data_buf.get(), data_bytes, element_size);
474+
tensor_proto->set_raw_data(raw_data_buf.get(), data_bytes);
475+
} else {
476+
tensor_proto->set_raw_data(data, data_bytes);
477+
}
441478
}
442479
}
443480
} catch (const Ort::Exception& ex) {
@@ -699,7 +736,17 @@ static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr attr, onnx::AttributeProto&
699736
const size_t data_bytes = tensor.GetTensorSizeInBytes();
700737

701738
// Copy the Ortvalue to TensorProto as raw data
702-
tensor_proto.set_raw_data(data, data_bytes);
739+
if constexpr (endian::native == endian::big) {
740+
size_t element_size = 0;
741+
GetTensorElementSize(element_type, element_size);
742+
// create local copy of data and do endianess conversion
743+
auto raw_data_buf = std::make_unique<unsigned char[]>(data_bytes);
744+
std::memcpy(raw_data_buf.get(), data, data_bytes);
745+
SwapByteOrderInplace(raw_data_buf.get(), data_bytes, element_size);
746+
tensor_proto.set_raw_data(raw_data_buf.get(), data_bytes);
747+
} else {
748+
tensor_proto.set_raw_data(data, data_bytes);
749+
}
703750

704751
*(attr_proto.mutable_t()) = std::move(tensor_proto);
705752
break;
@@ -718,5 +765,75 @@ static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr attr, onnx::AttributeProto&
718765
return Ort::Status{nullptr};
719766
}
720767

768+
Ort::Status ConvertExternalData(const OrtValueInfo* value_info, void* data, size_t bytes) {
769+
#if !defined(_WIN32)
770+
if constexpr (endian::native == endian::little) {
771+
return Ort::Status{nullptr};
772+
}
773+
std::vector<int64_t> initializer_dims;
774+
std::vector<std::string> initializer_sym_dims;
775+
ONNXTensorElementDataType initializer_elem_type = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
776+
size_t element_size = 0;
777+
Ort::ConstValueInfo ort_value_info{value_info};
778+
bool has_shape{false};
779+
ORT_EP_UTILS_CXX_RETURN_IF_ERROR(GetOrtValueInfoTensorTypeShape(ort_value_info, false,
780+
initializer_elem_type, initializer_dims,
781+
initializer_sym_dims, has_shape));
782+
GetTensorElementSize(initializer_elem_type, element_size);
783+
if (element_size != 1) {
784+
SwapByteOrderInplace(data, bytes, element_size);
785+
}
786+
#else
787+
(value_info);
788+
(data);
789+
(bytes);
790+
#endif
791+
return Ort::Status{nullptr};
792+
}
793+
794+
static Ort::Status GetTensorElementSize(const ONNXTensorElementDataType& element_type, size_t& element_size) {
795+
using TensorElemDataMap = std::unordered_map<ONNXTensorElementDataType, size_t>;
796+
static TensorElemDataMap tensor_elem_data_size{
797+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, sizeof(float)},
798+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, sizeof(uint8_t)},
799+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, sizeof(int8_t)},
800+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16, sizeof(uint16_t)},
801+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16, sizeof(int16_t)},
802+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, sizeof(uint16_t)},
803+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, sizeof(uint16_t)},
804+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, sizeof(int32_t)},
805+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32, sizeof(uint32_t)},
806+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, sizeof(int64_t)},
807+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64, sizeof(uint64_t)},
808+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE, sizeof(double)},
809+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, sizeof(uint8_t)},
810+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN, sizeof(uint8_t)},
811+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ, sizeof(uint8_t)},
812+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2, sizeof(uint8_t)},
813+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ, sizeof(uint8_t)},
814+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4, sizeof(uint8_t)},
815+
{ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4, sizeof(uint8_t)},
816+
};
817+
auto pos = tensor_elem_data_size.find(element_type);
818+
if (pos == tensor_elem_data_size.end()) {
819+
std::string err_msg = "Unexpected ONNXTensorElementDataType with value " + std::to_string(static_cast<int>(element_type));
820+
return Ort::Status(err_msg.c_str(), ORT_FAIL);
821+
}
822+
element_size = pos->second;
823+
return Ort::Status{nullptr};
824+
}
825+
826+
static void SwapByteOrderInplace(void* data, const size_t& data_len, const size_t& element_size) {
827+
char* bytes = reinterpret_cast<char*>(data);
828+
size_t num_elements = data_len / element_size;
829+
for (size_t i = 0; i < num_elements; ++i) {
830+
char* start_byte = bytes + i * element_size;
831+
char* end_byte = start_byte + element_size - 1;
832+
for (size_t count = 0; count < element_size / 2; ++count) {
833+
std::swap(*start_byte++, *end_byte--);
834+
}
835+
}
836+
}
837+
721838
} // namespace OrtEpUtils
722839
#endif // ORT_EP_UTILS_ORT_GRAPH_TO_PROTO_IMPL

onnxruntime/core/framework/tensorprotoutils.cc

Lines changed: 82 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -351,66 +351,90 @@ void ConvertRawDataInTensorProto(TensorProto& tensor) {
351351
void* bytes = NULL;
352352
size_t num_elements = 0;
353353

354-
switch (tensor.data_type()) {
355-
case TensorProto_DataType_FLOAT:
356-
bytes = tensor.mutable_float_data()->mutable_data();
357-
num_elements = tensor.float_data_size();
358-
element_size = sizeof(float);
359-
break;
360-
361-
case TensorProto_DataType_UINT8:
362-
case TensorProto_DataType_INT8:
363-
bytes = tensor.mutable_int32_data()->mutable_data();
364-
num_elements = tensor.int32_data_size();
365-
element_size = sizeof(uint8_t);
366-
break;
367-
368-
case TensorProto_DataType_UINT16:
369-
case TensorProto_DataType_INT16:
370-
case TensorProto_DataType_FLOAT16:
371-
case TensorProto_DataType_BFLOAT16:
372-
case TensorProto_DataType_INT32:
373-
bytes = tensor.mutable_int32_data()->mutable_data();
374-
num_elements = tensor.int32_data_size();
375-
// We are setting this to int32_t size because we need to swap all 4 bytes
376-
// to represent 16 bits within 32 bits correctly on a LE/BE system.
377-
element_size = sizeof(int32_t);
378-
break;
379-
380-
// uint32_t is stored in uint64_t
381-
case TensorProto_DataType_UINT32:
382-
case TensorProto_DataType_UINT64:
383-
bytes = tensor.mutable_uint64_data()->mutable_data();
384-
num_elements = tensor.uint64_data_size();
385-
element_size = sizeof(uint64_t);
386-
break;
387-
388-
case TensorProto_DataType_INT64:
389-
bytes = tensor.mutable_int64_data()->mutable_data();
390-
num_elements = tensor.int64_data_size();
391-
element_size = sizeof(int64_t);
392-
break;
393-
394-
case TensorProto_DataType_DOUBLE:
395-
bytes = tensor.mutable_double_data()->mutable_data();
396-
num_elements = tensor.double_data_size();
397-
element_size = sizeof(double);
398-
break;
399-
400-
case TensorProto_DataType_COMPLEX64:
401-
bytes = tensor.mutable_float_data()->mutable_data();
402-
num_elements = tensor.float_data_size();
403-
element_size = sizeof(float);
404-
break;
405-
}
406-
407-
if (element_size == 1) {
408-
return;
409-
}
410-
411-
if (tensor.has_raw_data()) {
354+
// For some data_type, element size differs for raw data vs
355+
// data set using the add_<data_type>data() API
356+
if (HasRawData(tensor)) {
357+
static std::unordered_map<size_t, size_t> tensorproto_data_size{
358+
{TensorProto_DataType_FLOAT, sizeof(float)},
359+
{TensorProto_DataType_UINT8, sizeof(uint8_t)},
360+
{TensorProto_DataType_INT8, sizeof(int8_t)},
361+
{TensorProto_DataType_UINT16, sizeof(uint16_t)},
362+
{TensorProto_DataType_INT16, sizeof(int16_t)},
363+
{TensorProto_DataType_FLOAT16, sizeof(uint16_t)},
364+
{TensorProto_DataType_BFLOAT16, sizeof(uint16_t)},
365+
{TensorProto_DataType_INT32, sizeof(int32_t)},
366+
{TensorProto_DataType_UINT32, sizeof(uint32_t)},
367+
{TensorProto_DataType_UINT64, sizeof(uint64_t)},
368+
{TensorProto_DataType_INT64, sizeof(int64_t)},
369+
{TensorProto_DataType_DOUBLE, sizeof(double)},
370+
{TensorProto_DataType_BOOL, sizeof(uint8_t)},
371+
{TensorProto_DataType_FLOAT8E4M3FN, sizeof(uint8_t)},
372+
{TensorProto_DataType_FLOAT8E4M3FNUZ, sizeof(uint8_t)},
373+
{TensorProto_DataType_FLOAT8E5M2, sizeof(uint8_t)},
374+
{TensorProto_DataType_FLOAT8E5M2FNUZ, sizeof(uint8_t)},
375+
{TensorProto_DataType_UINT4, sizeof(uint8_t)},
376+
{TensorProto_DataType_INT4, sizeof(uint8_t)},
377+
};
378+
auto pos = tensorproto_data_size.find(tensor.data_type());
379+
if (pos == tensorproto_data_size.end()) {
380+
return;
381+
}
382+
element_size = pos->second;
383+
if (element_size == 1) {
384+
return;
385+
}
412386
num_elements = tensor.raw_data().size() / element_size;
413387
bytes = tensor.mutable_raw_data()->data();
388+
} else { // HasRawData(tensor)
389+
390+
switch (tensor.data_type()) {
391+
case TensorProto_DataType_FLOAT:
392+
bytes = tensor.mutable_float_data()->mutable_data();
393+
num_elements = tensor.float_data_size();
394+
element_size = sizeof(float);
395+
break;
396+
397+
case TensorProto_DataType_BOOL:
398+
case TensorProto_DataType_UINT4:
399+
case TensorProto_DataType_INT4:
400+
case TensorProto_DataType_UINT8:
401+
case TensorProto_DataType_INT8:
402+
case TensorProto_DataType_UINT16:
403+
case TensorProto_DataType_INT16:
404+
case TensorProto_DataType_FLOAT16:
405+
case TensorProto_DataType_BFLOAT16:
406+
case TensorProto_DataType_FLOAT8E4M3FN:
407+
case TensorProto_DataType_FLOAT8E4M3FNUZ:
408+
case TensorProto_DataType_FLOAT8E5M2:
409+
case TensorProto_DataType_FLOAT8E5M2FNUZ:
410+
case TensorProto_DataType_INT32:
411+
bytes = tensor.mutable_int32_data()->mutable_data();
412+
num_elements = tensor.int32_data_size();
413+
// We are setting this to int32_t size because we need to swap all 4 bytes
414+
// to represent 16 bits within 32 bits correctly on a LE/BE system.
415+
element_size = sizeof(int32_t);
416+
break;
417+
418+
// uint32_t is stored in uint64_t
419+
case TensorProto_DataType_UINT32:
420+
case TensorProto_DataType_UINT64:
421+
bytes = tensor.mutable_uint64_data()->mutable_data();
422+
num_elements = tensor.uint64_data_size();
423+
element_size = sizeof(uint64_t);
424+
break;
425+
426+
case TensorProto_DataType_INT64:
427+
bytes = tensor.mutable_int64_data()->mutable_data();
428+
num_elements = tensor.int64_data_size();
429+
element_size = sizeof(int64_t);
430+
break;
431+
432+
case TensorProto_DataType_DOUBLE:
433+
bytes = tensor.mutable_double_data()->mutable_data();
434+
num_elements = tensor.double_data_size();
435+
element_size = sizeof(double);
436+
break;
437+
}
414438
}
415439

416440
gsl::span<std::byte> span = gsl::make_span(reinterpret_cast<std::byte*>(bytes), num_elements * element_size);

onnxruntime/test/ep_graph/test_ep_graph.cc

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ TEST(EpGraphTest, SerializeToProto_InputModelHasExternalIni) {
209209
std::string ext_ini_file_path = "conv_qdq_ext_ini_serialized.bin";
210210
std::filesystem::remove(ext_ini_file_path);
211211
std::ofstream ext_ini_ofs(ext_ini_file_path, std::ios::binary);
212-
auto handle_initializer_data = [&ext_ini_ofs, &ext_ini_file_path](const OrtValueInfo* /* value_info */,
212+
auto handle_initializer_data = [&ext_ini_ofs, &ext_ini_file_path](const OrtValueInfo* value_info,
213213
const void* data, size_t bytes,
214214
bool& is_external, std::string& location,
215215
int64_t& offset) -> Ort::Status {
@@ -218,9 +218,19 @@ TEST(EpGraphTest, SerializeToProto_InputModelHasExternalIni) {
218218
return Ort::Status{nullptr};
219219
}
220220

221-
offset = ext_ini_ofs.tellp();
221+
// For BE system, Before writing to file, we need to do data coversion.
222+
if constexpr (endian::native != endian::little) {
223+
auto data_buf = std::make_unique<char[]>(bytes);
224+
std::memcpy(data_buf.get(), data, bytes);
225+
OrtEpUtils::ConvertExternalData(value_info, data_buf.get(), bytes);
226+
offset = ext_ini_ofs.tellp();
227+
ext_ini_ofs.write(static_cast<const char*>(data_buf.get()), bytes);
228+
} else {
229+
offset = ext_ini_ofs.tellp();
230+
ext_ini_ofs.write(static_cast<const char*>(data), bytes);
231+
}
232+
222233
location = ext_ini_file_path;
223-
ext_ini_ofs.write(static_cast<const char*>(data), bytes);
224234
ext_ini_ofs.flush();
225235
is_external = true; // True if is external initializer.
226236

@@ -337,15 +347,24 @@ TEST(EpGraphTest, SerializeToProto_Mnist) {
337347
// OrtValueInfo* could be used to query initializer's name, type, shape,
338348
// node consumers, etc.
339349
(void)value_info;
340-
341350
if (bytes <= 127) {
342351
is_external = false; // Keep small initializers stored inside the TensorProto.
343352
return Ort::Status{nullptr};
344353
}
345354

346-
offset = ext_ini_ofs.tellp();
355+
// For BE system, Before writing to file, we need to do data coversion.
356+
if constexpr (endian::native != endian::little) {
357+
auto data_buf = std::make_unique<char[]>(bytes);
358+
std::memcpy(data_buf.get(), data, bytes);
359+
OrtEpUtils::ConvertExternalData(value_info, data_buf.get(), bytes);
360+
offset = ext_ini_ofs.tellp();
361+
ext_ini_ofs.write(static_cast<const char*>(data_buf.get()), bytes);
362+
} else {
363+
offset = ext_ini_ofs.tellp();
364+
ext_ini_ofs.write(static_cast<const char*>(data), bytes);
365+
}
366+
347367
location = ext_ini_file_path;
348-
ext_ini_ofs.write(static_cast<const char*>(data), bytes);
349368
ext_ini_ofs.flush();
350369
is_external = true; // True if is external initializer.
351370

0 commit comments

Comments
 (0)