From deac61bb44e20b88f8487835eb80d4dae07fdfa7 Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Wed, 3 Jun 2026 17:58:05 -0700 Subject: [PATCH 1/4] Fix Type-confusion: raw-pointer bind_input causing OOB write --- onnxruntime/python/onnxruntime_pybind_iobinding.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/onnxruntime/python/onnxruntime_pybind_iobinding.cc b/onnxruntime/python/onnxruntime_pybind_iobinding.cc index b82dd1474bdf6..58ab2b2e5a800 100644 --- a/onnxruntime/python/onnxruntime_pybind_iobinding.cc +++ b/onnxruntime/python/onnxruntime_pybind_iobinding.cc @@ -92,6 +92,12 @@ void addIoBindingMethods(pybind11::module& m) { }) // This binds input as a Tensor that wraps memory pointer along with the OrtMemoryInfo .def("bind_input", [](SessionIOBinding* io_binding, const std::string& name, const OrtDevice& device, int32_t element_type, const std::vector& shape, int64_t data_ptr) -> void { + // String tensors require live std::string objects in the backing buffer; the raw-pointer + // overload only wraps caller-provided bytes, so binding a string tensor here would lead + // to reading/writing through uninitialized std::string storage. Reject it explicitly. + if (element_type == onnx::TensorProto::STRING) { + throw std::runtime_error("Only binding non-string Tensors is currently supported"); + } auto ml_type = OnnxTypeToOnnxRuntimeTensorType(element_type); OrtValue ml_value; OrtMemoryInfo info(GetDeviceName(device), OrtDeviceAllocator, device); @@ -113,6 +119,12 @@ void addIoBindingMethods(pybind11::module& m) { OrtMemoryInfo info(GetDeviceName(device), OrtDeviceAllocator, device); auto ml_type = NumpyTypeToOnnxRuntimeTensorType(type_num); + // See comment in the int32_t element_type overload above: string tensors are not safe + // to bind via a raw, non-owning pointer because no std::string objects are constructed + // in the caller buffer. + if (ml_type == DataTypeImpl::GetType()) { + throw std::runtime_error("Only binding non-string Tensors is currently supported"); + } OrtValue ml_value; Tensor::InitOrtValue(ml_type, gsl::make_span(shape), reinterpret_cast(data_ptr), info, ml_value); From a7f62e00528beecd479be747ce94b3493c34ec50 Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:48:28 -0700 Subject: [PATCH 2/4] Added test for the changes --- .../onnxruntime_test_python_iobinding.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py index 0e0c62bba5d50..240cb040573b1 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py +++ b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py @@ -441,6 +441,30 @@ def test_bind_input_and_bind_output_with_ortvalues(self): # Inspect contents of output_ortvalue and make sure that it has the right contents self.assertTrue(np.array_equal(self._create_expected_output_alternate(), output_ortvalue.numpy())) + def test_bind_input_rejects_string_tensor(self): + # Binding a string tensor via a raw, non-owning pointer is unsafe: the backing buffer + # has no live std::string objects, which previously caused out-of-bounds writes when + # the tensor was later read or destroyed. Both overloads of bind_input (ONNX int + # element_type and numpy dtype) must reject string tensors explicitly. + session = onnxrt.InferenceSession(get_name("mul_1.onnx"), providers=onnxrt.get_available_providers()) + io_binding = session.io_binding() + + # Use a real allocation just to have a valid pointer; the type check happens before + # the pointer is dereferenced. + scratch = np.zeros(4, dtype=np.uint8) + scratch_ptr = scratch.ctypes.data + + # Overload 1: int32 ONNX element type. + with self.assertRaisesRegex(RuntimeError, "Only binding non-string Tensors"): + io_binding.bind_input("X", "cpu", 0, int(TensorProto.STRING), [1], scratch_ptr) + + # Overload 2: numpy dtype. NPY_UNICODE, NPY_STRING and NPY_OBJECT all map to + # std::string in NumpyTypeToOnnxRuntimeTensorType, so each of them must be rejected. + for dtype in (np.dtype("U1"), np.dtype("S1"), np.dtype(object)): + with self.subTest(dtype=dtype): + with self.assertRaisesRegex(RuntimeError, "Only binding non-string Tensors"): + io_binding.bind_input("X", "cpu", 0, dtype, [1], scratch_ptr) + if __name__ == "__main__": unittest.main() From be6880b15315c588b41ebb10cddd4bf8380fb31f Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Thu, 4 Jun 2026 16:36:57 -0700 Subject: [PATCH 3/4] Comparing the ONNX type enum constant instead of relying on pointer identity Co-authored-by: Copilot --- onnxruntime/python/onnxruntime_pybind_iobinding.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/onnxruntime_pybind_iobinding.cc b/onnxruntime/python/onnxruntime_pybind_iobinding.cc index 58ab2b2e5a800..d960444f240a4 100644 --- a/onnxruntime/python/onnxruntime_pybind_iobinding.cc +++ b/onnxruntime/python/onnxruntime_pybind_iobinding.cc @@ -121,8 +121,11 @@ void addIoBindingMethods(pybind11::module& m) { auto ml_type = NumpyTypeToOnnxRuntimeTensorType(type_num); // See comment in the int32_t element_type overload above: string tensors are not safe // to bind via a raw, non-owning pointer because no std::string objects are constructed - // in the caller buffer. - if (ml_type == DataTypeImpl::GetType()) { + // in the caller buffer. Compare against the ONNX type enum rather than the singleton + // MLDataType pointer so the check stays correct even if the type registry returns a + // different (but equivalent) instance. + const auto* primitive_type = ml_type->AsPrimitiveDataType(); + if (primitive_type != nullptr && primitive_type->GetDataType() == onnx::TensorProto::STRING) { throw std::runtime_error("Only binding non-string Tensors is currently supported"); } OrtValue ml_value; From 5eabd13533bbd4222041463b3b3c467daf7c4444 Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:24:51 -0700 Subject: [PATCH 4/4] Fixed nested 'with' statements. --- .../test/python/onnxruntime_test_python_iobinding.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py index 240cb040573b1..86b5473ecb4b8 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py +++ b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py @@ -461,9 +461,11 @@ def test_bind_input_rejects_string_tensor(self): # Overload 2: numpy dtype. NPY_UNICODE, NPY_STRING and NPY_OBJECT all map to # std::string in NumpyTypeToOnnxRuntimeTensorType, so each of them must be rejected. for dtype in (np.dtype("U1"), np.dtype("S1"), np.dtype(object)): - with self.subTest(dtype=dtype): - with self.assertRaisesRegex(RuntimeError, "Only binding non-string Tensors"): - io_binding.bind_input("X", "cpu", 0, dtype, [1], scratch_ptr) + with ( + self.subTest(dtype=dtype), + self.assertRaisesRegex(RuntimeError, "Only binding non-string Tensors"), + ): + io_binding.bind_input("X", "cpu", 0, dtype, [1], scratch_ptr) if __name__ == "__main__":