[AIX] test-suites failure fixes (#25791)

ranjitshs · web-flow · commit 760eea48e187 · 2025-11-06T15:16:10.000+05:30
### Description This PR is to fix some of the test case failures mentioned in #25790 1. cmake/onnxruntime_unittests.cmake To fix the onnxruntime_shared_lib_test crash 2. include/onnxruntime/core/providers/utils/ort_graph_to_proto.h Added method for handling external data. For fixing , EpGraphTest.SerializeToProto_ConstantOfShape , EpGraphTest.SerializeToProto_Mnist, EpGraphTest.SerializeToProto_InputModelHasExternalIni 3. onnxruntime/core/framework/tensorprotoutils.cc For fixing many test failures , related to float16 or Int16 type. 4. onnxruntime/test/framework/endian_test.cc For fixing, ConvertRawDataInTensorProtoTest.FloatData and ConvertRawDataInTensorProtoTest.Int32Data ### Motivation and Context To fix the AIX (Big endian) related test failures.
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -1616,6 +1616,10 @@ endif()
       target_compile_definitions(onnxruntime_shared_lib_test PRIVATE USE_DUMMY_EXA_DEMANGLE=1)
     endif()
 
+    if (CMAKE_SYSTEM_NAME MATCHES "AIX" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+      set_target_properties(onnxruntime_shared_lib_test PROPERTIES ENABLE_EXPORTS 1)
+    endif()
+
     if (IOS)
       add_custom_command(
         TARGET onnxruntime_shared_lib_test POST_BUILD
diff --git a/include/onnxruntime/core/providers/utils/ort_graph_to_proto.h b/include/onnxruntime/core/providers/utils/ort_graph_to_proto.h
@@ -184,6 +184,16 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
 Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
                             onnx::ModelProto& model_proto,
                             HandleInitializerDataFunc handle_initializer_data_func = nullptr);
+/// <summary>
+/// Convert the endianess of data based of tensor element type. Mainly used in BE systems.
+/// </summary>
+/// <param name="value_info">OrtValueInfo for the initializer. Can be used to query name, type, shape,
+///                           and consumer nodes.</param>
+/// <param name="data">Pointer to data buffer.</param>
+/// <param name="bytes">Length of data buffer.</param>
+/// <returns>An Ort::Status indicating success or an error.</returns>
+Ort::Status ConvertExternalData(const OrtValueInfo* value_info, void* data, size_t bytes);
+
 }  // namespace OrtEpUtils
 
 // End of header
@@ -229,6 +239,23 @@ static Ort::Status GetOrtValueInfoTensorTypeShape(Ort::ConstValueInfo vi,
                                                   /*out*/ bool& has_shape);
 static Ort::Status OrtValueInfoToProto(Ort::ConstValueInfo ort_value_info, onnx::ValueInfoProto& value_info_proto);
 static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr ort_attr, onnx::AttributeProto& attr_proto);
+static Ort::Status GetTensorElementSize(const ONNXTensorElementDataType& element_type, size_t& element_size);
+static void SwapByteOrderInplace(void* data, const size_t& data_len, const size_t& element_size);
+
+// Below endian enum class is referenced from include/onnxruntime/core/framework/endian.h
+enum class endian {
+#if defined(_WIN32)
+  little = 0,
+  big = 1,
+  native = little,
+#elif defined(__GNUC__) || defined(__clang__)
+  little = __ORDER_LITTLE_ENDIAN__,
+  big = __ORDER_BIG_ENDIAN__,
+  native = __BYTE_ORDER__,
+#else
+#error onnxruntime::endian is not implemented in this environment.
+#endif
+};
 
 Ort::Status OrtGraphToProto(const OrtGraph& graph,
                             onnx::GraphProto& graph_proto,
@@ -437,7 +464,17 @@ Ort::Status OrtGraphToProto(const OrtGraph& graph,
       } else {
         // User wants to store data inline the TensorProto's raw_data
         tensor_proto->set_data_location(onnx::TensorProto_DataLocation_DEFAULT);
-        tensor_proto->set_raw_data(data, data_bytes);
+        if constexpr (endian::native == endian::big) {
+          size_t element_size = 0;
+          GetTensorElementSize(initializer_elem_type, element_size);
+          // create local copy of data and do endianess conversion
+          auto raw_data_buf = std::make_unique<unsigned char[]>(data_bytes);
+          std::memcpy(raw_data_buf.get(), data, data_bytes);
+          SwapByteOrderInplace(raw_data_buf.get(), data_bytes, element_size);
+          tensor_proto->set_raw_data(raw_data_buf.get(), data_bytes);
+        } else {
+          tensor_proto->set_raw_data(data, data_bytes);
+        }
       }
     }
   } catch (const Ort::Exception& ex) {
@@ -699,7 +736,17 @@ static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr attr, onnx::AttributeProto&
         const size_t data_bytes = tensor.GetTensorSizeInBytes();
 
         // Copy the Ortvalue to TensorProto as raw data
-        tensor_proto.set_raw_data(data, data_bytes);
+        if constexpr (endian::native == endian::big) {
+          size_t element_size = 0;
+          GetTensorElementSize(element_type, element_size);
+          // create local copy of data and do endianess conversion
+          auto raw_data_buf = std::make_unique<unsigned char[]>(data_bytes);
+          std::memcpy(raw_data_buf.get(), data, data_bytes);
+          SwapByteOrderInplace(raw_data_buf.get(), data_bytes, element_size);
+          tensor_proto.set_raw_data(raw_data_buf.get(), data_bytes);
+        } else {
+          tensor_proto.set_raw_data(data, data_bytes);
+        }
 
         *(attr_proto.mutable_t()) = std::move(tensor_proto);
         break;
@@ -718,5 +765,75 @@ static Ort::Status OrtOpAttrToProto(Ort::ConstOpAttr attr, onnx::AttributeProto&
   return Ort::Status{nullptr};
 }
 
+Ort::Status ConvertExternalData(const OrtValueInfo* value_info, void* data, size_t bytes) {
+#if !defined(_WIN32)
+  if constexpr (endian::native == endian::little) {
+    return Ort::Status{nullptr};
+  }
+  std::vector<int64_t> initializer_dims;
+  std::vector<std::string> initializer_sym_dims;
+  ONNXTensorElementDataType initializer_elem_type = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+  size_t element_size = 0;
+  Ort::ConstValueInfo ort_value_info{value_info};
+  bool has_shape{false};
+  ORT_EP_UTILS_CXX_RETURN_IF_ERROR(GetOrtValueInfoTensorTypeShape(ort_value_info, false,
+                                                                  initializer_elem_type, initializer_dims,
+                                                                  initializer_sym_dims, has_shape));
+  GetTensorElementSize(initializer_elem_type, element_size);
+  if (element_size != 1) {
+    SwapByteOrderInplace(data, bytes, element_size);
+  }
+#else
+  (value_info);
+  (data);
+  (bytes);
+#endif
+  return Ort::Status{nullptr};
+}
+
+static Ort::Status GetTensorElementSize(const ONNXTensorElementDataType& element_type, size_t& element_size) {
+  using TensorElemDataMap = std::unordered_map<ONNXTensorElementDataType, size_t>;
+  static TensorElemDataMap tensor_elem_data_size{
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, sizeof(float)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, sizeof(int8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16, sizeof(uint16_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16, sizeof(int16_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, sizeof(uint16_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, sizeof(uint16_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, sizeof(int32_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32, sizeof(uint32_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, sizeof(int64_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64, sizeof(uint64_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE, sizeof(double)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4, sizeof(uint8_t)},
+      {ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4, sizeof(uint8_t)},
+  };
+  auto pos = tensor_elem_data_size.find(element_type);
+  if (pos == tensor_elem_data_size.end()) {
+    std::string err_msg = "Unexpected ONNXTensorElementDataType with value " + std::to_string(static_cast<int>(element_type));
+    return Ort::Status(err_msg.c_str(), ORT_FAIL);
+  }
+  element_size = pos->second;
+  return Ort::Status{nullptr};
+}
+
+static void SwapByteOrderInplace(void* data, const size_t& data_len, const size_t& element_size) {
+  char* bytes = reinterpret_cast<char*>(data);
+  size_t num_elements = data_len / element_size;
+  for (size_t i = 0; i < num_elements; ++i) {
+    char* start_byte = bytes + i * element_size;
+    char* end_byte = start_byte + element_size - 1;
+    for (size_t count = 0; count < element_size / 2; ++count) {
+      std::swap(*start_byte++, *end_byte--);
+    }
+  }
+}
+
 }  // namespace OrtEpUtils
 #endif  // ORT_EP_UTILS_ORT_GRAPH_TO_PROTO_IMPL
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -351,66 +351,90 @@ void ConvertRawDataInTensorProto(TensorProto& tensor) {
   void* bytes = NULL;
   size_t num_elements = 0;
 
-  switch (tensor.data_type()) {
-    case TensorProto_DataType_FLOAT:
-      bytes = tensor.mutable_float_data()->mutable_data();
-      num_elements = tensor.float_data_size();
-      element_size = sizeof(float);
-      break;
-
-    case TensorProto_DataType_UINT8:
-    case TensorProto_DataType_INT8:
-      bytes = tensor.mutable_int32_data()->mutable_data();
-      num_elements = tensor.int32_data_size();
-      element_size = sizeof(uint8_t);
-      break;
-
-    case TensorProto_DataType_UINT16:
-    case TensorProto_DataType_INT16:
-    case TensorProto_DataType_FLOAT16:
-    case TensorProto_DataType_BFLOAT16:
-    case TensorProto_DataType_INT32:
-      bytes = tensor.mutable_int32_data()->mutable_data();
-      num_elements = tensor.int32_data_size();
-      // We are setting this to int32_t size because we need to swap all 4 bytes
-      // to represent 16 bits within 32 bits correctly on a LE/BE system.
-      element_size = sizeof(int32_t);
-      break;
-
-    // uint32_t is stored in uint64_t
-    case TensorProto_DataType_UINT32:
-    case TensorProto_DataType_UINT64:
-      bytes = tensor.mutable_uint64_data()->mutable_data();
-      num_elements = tensor.uint64_data_size();
-      element_size = sizeof(uint64_t);
-      break;
-
-    case TensorProto_DataType_INT64:
-      bytes = tensor.mutable_int64_data()->mutable_data();
-      num_elements = tensor.int64_data_size();
-      element_size = sizeof(int64_t);
-      break;
-
-    case TensorProto_DataType_DOUBLE:
-      bytes = tensor.mutable_double_data()->mutable_data();
-      num_elements = tensor.double_data_size();
-      element_size = sizeof(double);
-      break;
-
-    case TensorProto_DataType_COMPLEX64:
-      bytes = tensor.mutable_float_data()->mutable_data();
-      num_elements = tensor.float_data_size();
-      element_size = sizeof(float);
-      break;
-  }
-
-  if (element_size == 1) {
-    return;
-  }
-
-  if (tensor.has_raw_data()) {
+  // For some data_type, element size differs for raw data vs
+  // data set using the add_<data_type>data() API
+  if (HasRawData(tensor)) {
+    static std::unordered_map<size_t, size_t> tensorproto_data_size{
+        {TensorProto_DataType_FLOAT, sizeof(float)},
+        {TensorProto_DataType_UINT8, sizeof(uint8_t)},
+        {TensorProto_DataType_INT8, sizeof(int8_t)},
+        {TensorProto_DataType_UINT16, sizeof(uint16_t)},
+        {TensorProto_DataType_INT16, sizeof(int16_t)},
+        {TensorProto_DataType_FLOAT16, sizeof(uint16_t)},
+        {TensorProto_DataType_BFLOAT16, sizeof(uint16_t)},
+        {TensorProto_DataType_INT32, sizeof(int32_t)},
+        {TensorProto_DataType_UINT32, sizeof(uint32_t)},
+        {TensorProto_DataType_UINT64, sizeof(uint64_t)},
+        {TensorProto_DataType_INT64, sizeof(int64_t)},
+        {TensorProto_DataType_DOUBLE, sizeof(double)},
+        {TensorProto_DataType_BOOL, sizeof(uint8_t)},
+        {TensorProto_DataType_FLOAT8E4M3FN, sizeof(uint8_t)},
+        {TensorProto_DataType_FLOAT8E4M3FNUZ, sizeof(uint8_t)},
+        {TensorProto_DataType_FLOAT8E5M2, sizeof(uint8_t)},
+        {TensorProto_DataType_FLOAT8E5M2FNUZ, sizeof(uint8_t)},
+        {TensorProto_DataType_UINT4, sizeof(uint8_t)},
+        {TensorProto_DataType_INT4, sizeof(uint8_t)},
+    };
+    auto pos = tensorproto_data_size.find(tensor.data_type());
+    if (pos == tensorproto_data_size.end()) {
+      return;
+    }
+    element_size = pos->second;
+    if (element_size == 1) {
+      return;
+    }
     num_elements = tensor.raw_data().size() / element_size;
     bytes = tensor.mutable_raw_data()->data();
+  } else {  // HasRawData(tensor)
+
+    switch (tensor.data_type()) {
+      case TensorProto_DataType_FLOAT:
+        bytes = tensor.mutable_float_data()->mutable_data();
+        num_elements = tensor.float_data_size();
+        element_size = sizeof(float);
+        break;
+
+      case TensorProto_DataType_BOOL:
+      case TensorProto_DataType_UINT4:
+      case TensorProto_DataType_INT4:
+      case TensorProto_DataType_UINT8:
+      case TensorProto_DataType_INT8:
+      case TensorProto_DataType_UINT16:
+      case TensorProto_DataType_INT16:
+      case TensorProto_DataType_FLOAT16:
+      case TensorProto_DataType_BFLOAT16:
+      case TensorProto_DataType_FLOAT8E4M3FN:
+      case TensorProto_DataType_FLOAT8E4M3FNUZ:
+      case TensorProto_DataType_FLOAT8E5M2:
+      case TensorProto_DataType_FLOAT8E5M2FNUZ:
+      case TensorProto_DataType_INT32:
+        bytes = tensor.mutable_int32_data()->mutable_data();
+        num_elements = tensor.int32_data_size();
+        // We are setting this to int32_t size because we need to swap all 4 bytes
+        // to represent 16 bits within 32 bits correctly on a LE/BE system.
+        element_size = sizeof(int32_t);
+        break;
+
+      // uint32_t is stored in uint64_t
+      case TensorProto_DataType_UINT32:
+      case TensorProto_DataType_UINT64:
+        bytes = tensor.mutable_uint64_data()->mutable_data();
+        num_elements = tensor.uint64_data_size();
+        element_size = sizeof(uint64_t);
+        break;
+
+      case TensorProto_DataType_INT64:
+        bytes = tensor.mutable_int64_data()->mutable_data();
+        num_elements = tensor.int64_data_size();
+        element_size = sizeof(int64_t);
+        break;
+
+      case TensorProto_DataType_DOUBLE:
+        bytes = tensor.mutable_double_data()->mutable_data();
+        num_elements = tensor.double_data_size();
+        element_size = sizeof(double);
+        break;
+    }
   }
 
   gsl::span<std::byte> span = gsl::make_span(reinterpret_cast<std::byte*>(bytes), num_elements * element_size);
diff --git a/onnxruntime/test/ep_graph/test_ep_graph.cc b/onnxruntime/test/ep_graph/test_ep_graph.cc
@@ -209,7 +209,7 @@ TEST(EpGraphTest, SerializeToProto_InputModelHasExternalIni) {
     std::string ext_ini_file_path = "conv_qdq_ext_ini_serialized.bin";
     std::filesystem::remove(ext_ini_file_path);
     std::ofstream ext_ini_ofs(ext_ini_file_path, std::ios::binary);
-    auto handle_initializer_data = [&ext_ini_ofs, &ext_ini_file_path](const OrtValueInfo* /* value_info */,
+    auto handle_initializer_data = [&ext_ini_ofs, &ext_ini_file_path](const OrtValueInfo* value_info,
                                                                       const void* data, size_t bytes,
                                                                       bool& is_external, std::string& location,
                                                                       int64_t& offset) -> Ort::Status {
@@ -218,9 +218,19 @@ TEST(EpGraphTest, SerializeToProto_InputModelHasExternalIni) {
         return Ort::Status{nullptr};
       }
 
-      offset = ext_ini_ofs.tellp();
+      // For BE system, Before writing to file, we need to do data coversion.
+      if constexpr (endian::native != endian::little) {
+        auto data_buf = std::make_unique<char[]>(bytes);
+        std::memcpy(data_buf.get(), data, bytes);
+        OrtEpUtils::ConvertExternalData(value_info, data_buf.get(), bytes);
+        offset = ext_ini_ofs.tellp();
+        ext_ini_ofs.write(static_cast<const char*>(data_buf.get()), bytes);
+      } else {
+        offset = ext_ini_ofs.tellp();
+        ext_ini_ofs.write(static_cast<const char*>(data), bytes);
+      }
+
       location = ext_ini_file_path;
-      ext_ini_ofs.write(static_cast<const char*>(data), bytes);
       ext_ini_ofs.flush();
       is_external = true;  // True if is external initializer.
 
@@ -337,15 +347,24 @@ TEST(EpGraphTest, SerializeToProto_Mnist) {
       // OrtValueInfo* could be used to query initializer's name, type, shape,
       // node consumers, etc.
       (void)value_info;
-
       if (bytes <= 127) {
         is_external = false;  // Keep small initializers stored inside the TensorProto.
         return Ort::Status{nullptr};
       }
 
-      offset = ext_ini_ofs.tellp();
+      // For BE system, Before writing to file, we need to do data coversion.
+      if constexpr (endian::native != endian::little) {
+        auto data_buf = std::make_unique<char[]>(bytes);
+        std::memcpy(data_buf.get(), data, bytes);
+        OrtEpUtils::ConvertExternalData(value_info, data_buf.get(), bytes);
+        offset = ext_ini_ofs.tellp();
+        ext_ini_ofs.write(static_cast<const char*>(data_buf.get()), bytes);
+      } else {
+        offset = ext_ini_ofs.tellp();
+        ext_ini_ofs.write(static_cast<const char*>(data), bytes);
+      }
+
       location = ext_ini_file_path;
-      ext_ini_ofs.write(static_cast<const char*>(data), bytes);
       ext_ini_ofs.flush();
       is_external = true;  // True if is external initializer.
 
diff --git a/onnxruntime/test/framework/endian_test.cc b/onnxruntime/test/framework/endian_test.cc