Fix misaligned addresses while reading tensor attributes from raw data buffers (#27312)

n-v-k · web-flow · commit 0df5dbc7c4d3 · 2026-02-15T19:53:34.000Z
### Description Explicitly copy tensor attribute values from raw data buffers instead of directly using pointers without checking proper memory alignment. ### Motivation and Context Fixes #27311 With some (rare) models ONNXRuntime built for Android armeabi-v7a 32 bit architecture crashes with bus error. Raw data buffers for tensor attributes of type `char*` has no proper alignment guarantees while reading values from them. Actually, it is in general (not only Android armeabi-v7a) UB to access objects at a misaligned address in C++, though many modern platforms allow it.
diff --git a/onnxruntime/core/optimizer/common_subexpression_elimination.cc b/onnxruntime/core/optimizer/common_subexpression_elimination.cc
@@ -172,29 +172,18 @@ bool AreRangesEqual(const Range& lhs, const Range& rhs) {
 }
 
 // Check if two tensor attributes are equal scalar tensors, mainly to support ConstantOfShape Op.
-// Currently support float, float16 and int64 data types, and requires the data are raw data in TensorProto.
 bool AreScalarTensorAttributeEqual(const ONNX_NAMESPACE::TensorProto& lhs_t, const ONNX_NAMESPACE::TensorProto& rhs_t) {
   if (!(utils::HasDataType(lhs_t) && utils::HasDataType(rhs_t) && lhs_t.data_type() == rhs_t.data_type() &&
-        (lhs_t.data_type() == onnx::TensorProto_DataType_FLOAT ||
-         lhs_t.data_type() == onnx::TensorProto_DataType_FLOAT16 ||
-         lhs_t.data_type() == onnx::TensorProto_DataType_INT64) &&
-        lhs_t.dims_size() == 1 && rhs_t.dims_size() == 1 && lhs_t.dims()[0] == 1 && rhs_t.dims()[0] == 1 &&
-        utils::HasRawData(lhs_t) && utils::HasRawData(rhs_t))) {
+        lhs_t.data_type() != onnx::TensorProto_DataType_STRING &&
+        lhs_t.dims_size() == 1 && rhs_t.dims_size() == 1 && lhs_t.dims()[0] == 1 && rhs_t.dims()[0] == 1)) {
     return false;
   }
-  const void* lhs_value = lhs_t.raw_data().data();
-  const void* rhs_value = rhs_t.raw_data().data();
-  switch (lhs_t.data_type()) {
-    case onnx::TensorProto_DataType_FLOAT:
-      return *reinterpret_cast<const float*>(lhs_value) == *reinterpret_cast<const float*>(rhs_value);
-    case onnx::TensorProto_DataType_FLOAT16:
-      return *reinterpret_cast<const MLFloat16*>(lhs_value) == *reinterpret_cast<const MLFloat16*>(rhs_value);
-    case onnx::TensorProto_DataType_INT64:
-      return *reinterpret_cast<const int64_t*>(lhs_value) == *reinterpret_cast<const int64_t*>(rhs_value);
-    default:
-      break;
+  std::vector<uint8_t> unpacked_lhs_tensor, unpacked_rhs_tensor;
+  if (!utils::UnpackInitializerData(lhs_t, unpacked_lhs_tensor).IsOK() ||
+      !utils::UnpackInitializerData(rhs_t, unpacked_rhs_tensor).IsOK()) {
+    return false;
   }
-  return false;
+  return unpacked_lhs_tensor == unpacked_rhs_tensor;
 }
 
 bool AreEqual(const ONNX_NAMESPACE::AttributeProto& lhs, const ONNX_NAMESPACE::AttributeProto& rhs) {
@@ -235,26 +224,16 @@ bool AreEqual(const ONNX_NAMESPACE::AttributeProto& lhs, const ONNX_NAMESPACE::A
   return false;
 }
 
-// Support scalar float/int64/fp16 tensor attribute only for now, and requires data is raw data in TensorProto.
+// Support scalar tensor attribute only for now.
 std::size_t GetTensorAttributeHash(const ONNX_NAMESPACE::TensorProto& attr_t) {
   std::size_t hash = 0;
-  if (utils::HasDataType(attr_t) && attr_t.dims_size() == 1 && attr_t.dims()[0] == 1 && utils::HasRawData(attr_t)) {
+  if (utils::HasDataType(attr_t) && attr_t.dims_size() == 1 && attr_t.dims()[0] == 1) {
     int data_type = attr_t.data_type();
-    switch (data_type) {
-      case onnx::TensorProto_DataType_FLOAT:
-        UpdateHash(data_type, hash);
-        UpdateHash(*reinterpret_cast<const float*>(attr_t.raw_data().data()), hash);
-        break;
-      case onnx::TensorProto_DataType_FLOAT16:
-        UpdateHash(data_type, hash);
-        UpdateHash(static_cast<float>(*reinterpret_cast<const MLFloat16*>(attr_t.raw_data().data())), hash);
-        break;
-      case onnx::TensorProto_DataType_INT64:
-        UpdateHash(data_type, hash);
-        UpdateHash(*reinterpret_cast<const int64_t*>(attr_t.raw_data().data()), hash);
-        break;
-      default:
-        break;
+    ORT_ENFORCE(data_type != onnx::TensorProto_DataType_STRING, "Unexpected tensor string type");
+    std::vector<uint8_t> unpacked_tensor;
+    if (utils::UnpackInitializerData(attr_t, unpacked_tensor).IsOK()) {
+      UpdateHash(data_type, hash);
+      UpdateHashWithContainer(unpacked_tensor, hash);
     }
   }
   return hash;