Add check that tensor sizes match in DataTransferManager::CopyTensors (#27008)

skottmckay · web-flow · commit 2f4be9a14fa0 · 2026-01-15T19:31:49.000+10:00
### Description
&lt;!-- Describe your changes. --&gt;
Add check that tensor sizes match in DataTransferManager::CopyTensors
before calling the IDataTransfer implementation so that the check is
done in one place.

We check the sizes match in DataTransferManager::CopyTensor[Async] so
this makes things consistent when a batched copy is done.

It is not required for DataTransferManager::CopySparseTensors. The
default implementation of IDataTransfer::CopySparseTensors is not
overridden by any EP so all sparse tensor copies (single or batched) end
up going via SparseTensor::Copy which has size checks.

### Motivation and Context
&lt;!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. --&gt;
TRT RTX had a bug and was returning an output value that was an
incorrect size. When pre-allocated outputs on a different device were
provided we hit DataTransferManager::CopyTensors which had no check the
sizes matched, leading to a heap checker violation.
diff --git a/onnxruntime/core/framework/data_transfer_manager.cc b/onnxruntime/core/framework/data_transfer_manager.cc
@@ -54,12 +54,9 @@ Status DataTransferManager::CopyTensor(const Tensor& src, Tensor& dst) const {
     return data_transfer->CopyTensor(src, dst);
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME,
-                         FAIL,
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                          "There's no data transfer registered for copying tensors from ",
-                         src.Location().device.ToString(),
-                         " to ",
-                         dst.Location().device.ToString());
+                         src.Location().device.ToString(), " to ", dst.Location().device.ToString());
 }
 
 Status DataTransferManager::CopyTensorAsync(const Tensor& src, Tensor& dst, Stream& stream) const {
@@ -75,12 +72,9 @@ Status DataTransferManager::CopyTensorAsync(const Tensor& src, Tensor& dst, Stre
     return data_transfer->CopyTensorAsync(src, dst, stream);
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME,
-                         FAIL,
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                          "There's no data transfer registered for copying tensors from ",
-                         src.Location().device.ToString(),
-                         " to ",
-                         dst.Location().device.ToString());
+                         src.Location().device.ToString(), " to ", dst.Location().device.ToString());
 }
 
 #if !defined(DISABLE_SPARSE_TENSORS)
@@ -97,12 +91,9 @@ Status DataTransferManager::CopySparseTensor(const SparseTensor& src, SparseTens
     return src.Copy(*data_transfer, dst);
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME,
-                         FAIL,
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                          "There's no data transfer registered for copying tensors from ",
-                         src.Location().device.ToString(),
-                         " to ",
-                         dst.Location().device.ToString());
+                         src.Location().device.ToString(), " to ", dst.Location().device.ToString());
 }
 #endif
 
@@ -130,12 +121,17 @@ common::Status DataTransferManager::CopyTensors(const std::vector<IDataTransfer:
   }
 
   if (first_dt == nullptr) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME,
-                           FAIL,
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                            "There's no data transfer registered for copying tensors from ",
-                           src_device.ToString(),
-                           " to ",
-                           dst_device.ToString());
+                           src_device.ToString(), " to ", dst_device.ToString());
+  }
+
+  for (const auto& pair : src_dst_pairs) {
+    const auto& src_shape = pair.src.get().Shape();
+    const auto& dst_shape = pair.dst.get().Shape();
+    if (src_shape.Size() != dst_shape.Size()) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Tensor size mismatch. src:", src_shape, " dst:", dst_shape);
+    }
   }
 
   // all copies are between the same devices so we can do them all at once
@@ -148,11 +144,13 @@ common::Status DataTransferManager::CopyTensors(const std::vector<IDataTransfer:
   // batch as much as possible.
 
   // copy the first one as we already did the IDataTransfer lookup
-  ORT_RETURN_IF_ERROR(first_pair.src_stream ? first_dt->CopyTensorAsync(first_pair.src.get(), first_pair.dst.get(), *(first_pair.src_stream))
+  ORT_RETURN_IF_ERROR(first_pair.src_stream ? first_dt->CopyTensorAsync(first_pair.src.get(), first_pair.dst.get(),
+                                                                        *(first_pair.src_stream))
                                             : first_dt->CopyTensor(first_pair.src.get(), first_pair.dst.get()));
 
   for (auto cur_pair = src_dst_pairs.cbegin() + 1, end_pair = src_dst_pairs.cend(); cur_pair != end_pair; ++cur_pair) {
-    ORT_RETURN_IF_ERROR(!cur_pair->src_stream ? CopyTensor(cur_pair->src, cur_pair->dst) : CopyTensorAsync(cur_pair->src, cur_pair->dst, *(cur_pair->src_stream)));
+    ORT_RETURN_IF_ERROR(!cur_pair->src_stream ? CopyTensor(cur_pair->src, cur_pair->dst)
+                                              : CopyTensorAsync(cur_pair->src, cur_pair->dst, *(cur_pair->src_stream)));
   }
 
   return Status::OK();
@@ -183,12 +181,9 @@ common::Status DataTransferManager::CopySparseTensors(const std::vector<IDataTra
   }
 
   if (first_dt == nullptr) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME,
-                           FAIL,
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                            "There's no data transfer registered for copying tensors from ",
-                           src_device.ToString(),
-                           " to ",
-                           dst_device.ToString());
+                           src_device.ToString(), " to ", dst_device.ToString());
   }
 
   // all copies are between the same devices so we can do them all at once
diff --git a/onnxruntime/test/framework/data_transfer_manager_test.cc b/onnxruntime/test/framework/data_transfer_manager_test.cc
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include "core/common/inlined_containers.h"
+#include "core/framework/data_transfer_manager.h"
+#include "core/framework/ort_value.h"
+#include "test/unittest_util/framework_test_utils.h"
+#include "test/util/include/asserts.h"
+
+namespace onnxruntime {
+namespace test {
+
+// DataTransferManager::CopyTensors should validate sizes match before calling the IDataTransfer implementation
+TEST(DataTransferManagerTest, BatchedTensorCopyBadSize) {
+  auto allocator = TestCPUExecutionProvider()->CreatePreferredAllocators()[0];
+  std::vector<OrtValue> src_tensors{2};
+  InlinedVector<int64_t> shape_a{4}, shape_b{5}, shape_c{6};
+  std::vector<OrtValue> dst_tensors{2};
+
+  // first pair is matched
+  AllocateMLValue<float>(allocator, shape_a, &src_tensors[0]);
+  AllocateMLValue<float>(allocator, shape_a, &dst_tensors[0]);
+
+  // second pair has size mismatch
+  AllocateMLValue<float>(allocator, shape_c, &src_tensors[1]);
+  AllocateMLValue<float>(allocator, shape_b, &dst_tensors[1]);
+
+  DataTransferManager dtm;
+  ASSERT_STATUS_OK(dtm.RegisterDataTransfer(std::make_unique<CPUDataTransfer>()));
+
+  std::vector<IDataTransfer::SrcDstPair> src_dst_pairs;
+  src_dst_pairs.push_back({src_tensors[0].Get<Tensor>(), *dst_tensors[0].GetMutable<Tensor>(), nullptr});
+  src_dst_pairs.push_back({src_tensors[1].Get<Tensor>(), *dst_tensors[1].GetMutable<Tensor>(), nullptr});
+  auto status = dtm.CopyTensors(src_dst_pairs);
+
+  ASSERT_STATUS_NOT_OK(status);
+  ASSERT_THAT(status.ErrorMessage(), testing::HasSubstr("Tensor size mismatch"));
+}
+
+}  // namespace test
+}  // namespace onnxruntime