Skip to content

Commit c9b4be5

Browse files
Fix out-of-bounds memory access in SetKernel for 0-size tensor (#78486)
* Fix out-of-bounds memory access in SetKernel with 0-size source tensor When calling Tensor.set_(source, shape, stride, offset) with a 0-size source tensor and non-zero target shape, the original code had a missing branch in the conditional logic: when source.numel()==0 and x.numel()!=0, no branch was executed, leaving `out` with its original data holder but with the user-specified meta (shape/stride). This caused ContiguousKernel to read beyond allocated memory when converting the strided tensor to contiguous. The fix forces the output tensor to inherit the source's 0-size dims/strides when source has no elements, preventing out-of-bounds access. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(phi): fix set_kernel access memory error when tensor is empty * test(inplace): fix assert of 0-size tensor set_ behaviour * fix(phi): fix CPU * test(inplace): add 0-dim tensor set to non-0-size tensor tests * fix(set): handle zero-element output in inplace operation --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7c6077b commit c9b4be5

File tree

2 files changed

+173
-6
lines changed

2 files changed

+173
-6
lines changed

paddle/phi/kernels/set_kernel.cc

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,26 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414
#include "paddle/phi/kernels/set_kernel.h"
15+
#include <cstring>
16+
#include "paddle/phi/common/memory_utils.h"
1517
#include "paddle/phi/core/kernel_registry.h"
1618
#include "paddle/phi/kernels/full_kernel.h"
1719
namespace phi {
1820

21+
// Compute the minimum number of elements required in storage to hold
22+
// a strided view described by dims, stride and offset.
23+
static int64_t ComputeRequiredStorageSize(const std::vector<int64_t>& dims,
24+
const std::vector<int64_t>& stride,
25+
int64_t offset) {
26+
int64_t required = offset;
27+
for (size_t i = 0; i < dims.size(); ++i) {
28+
if (dims[i] > 0) {
29+
required += (dims[i] - 1) * stride[i];
30+
}
31+
}
32+
return required + 1; // +1 for the last element itself
33+
}
34+
1935
template <typename T, typename Context>
2036
void SetKernel(const Context& dev_ctx,
2137
const DenseTensor& x,
@@ -29,11 +45,58 @@ void SetKernel(const Context& dev_ctx,
2945
meta.strides = DDim(stride.data(), static_cast<int>(stride.size()));
3046
meta.offset = offset;
3147
if (x.numel() == 0 || source.numel() == 0) {
32-
if (source.numel() != 0) {
48+
int64_t out_numel = 1;
49+
for (auto d : dims) {
50+
out_numel *= d;
51+
}
52+
if (source.numel() == 0 && x.numel() != 0) {
53+
// Source is empty but x has storage. Reuse x's storage and apply
54+
// the user-specified meta, matching PyTorch behavior.
55+
if (out_numel == 0) {
56+
// Output has 0 elements — no storage needed, just set meta.
57+
out->set_meta(meta);
58+
out->ShareInplaceVersionCounterWith(x);
59+
return;
60+
}
61+
// If the strided view requires more storage than x provides,
62+
// allocate a larger zero-filled buffer and copy x's data into it
63+
// to avoid out-of-bounds reads on elements beyond x's allocation.
64+
int64_t required_size = ComputeRequiredStorageSize(dims, stride, offset);
65+
if (required_size > x.numel()) {
66+
DenseTensor tmp;
67+
std::vector<int64_t> alloc_shape = {required_size};
68+
Full<T, Context>(dev_ctx, alloc_shape, 0, &tmp);
69+
if (dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU) {
70+
std::memcpy(tmp.data<T>(), x.data<T>(), x.numel() * sizeof(T));
71+
} else {
72+
memory_utils::Copy(dev_ctx.GetPlace(),
73+
tmp.data<T>(),
74+
dev_ctx.GetPlace(),
75+
x.data<T>(),
76+
x.numel() * sizeof(T),
77+
nullptr);
78+
}
79+
out->clear();
80+
*out = DenseTensor{tmp.Holder(), meta};
81+
} else {
82+
out->set_meta(meta);
83+
}
84+
} else if (source.numel() == 0 && x.numel() == 0 && out_numel != 0) {
85+
// Both x and source are 0-size but user wants non-zero shape.
86+
// Allocate zero-filled storage to avoid null pointer access.
87+
int64_t required_size = ComputeRequiredStorageSize(dims, stride, offset);
88+
DenseTensor tmp;
89+
std::vector<int64_t> alloc_shape = {required_size};
90+
Full<T, Context>(dev_ctx, alloc_shape, 0, &tmp);
91+
out->clear();
92+
*out = DenseTensor{tmp.Holder(), meta};
93+
} else if (source.numel() != 0) {
94+
out->clear();
95+
*out = DenseTensor{source.Holder(), meta};
96+
} else {
97+
// Both 0-size, output also 0-size
3398
out->clear();
3499
*out = DenseTensor{source.Holder(), meta};
35-
} else if (x.numel() == 0) {
36-
Full<T, Context>(dev_ctx, out->dims(), 0, out);
37100
}
38101
out->ShareInplaceVersionCounterWith(x);
39102
return;

test/legacy_test/test_inplace.py

Lines changed: 107 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2603,11 +2603,115 @@ class TestSet_API_ZeroSize(unittest.TestCase):
26032603
def setUp(self):
26042604
self.places = get_places()
26052605

2606-
def test_set_api(self):
2606+
def test_zero_size_source_with_nonzero_shape(self):
2607+
"""When source is 0-size but user specifies non-zero dims/stride,
2608+
output should respect user-specified shape (matching PyTorch behavior).
2609+
Storage is expanded if needed to avoid out-of-bounds access."""
26072610
for place in self.places:
26082611
with paddle.base.dygraph.guard(place):
2609-
out = paddle.randn([20]).set_(paddle.randn([0, 3]), [20], [2])
2610-
np.testing.assert_allclose(out.shape, [20])
2612+
source = paddle.randn([0, 3])
2613+
x = paddle.randn([20])
2614+
out = x.set_(source, [20], [2])
2615+
self.assertEqual(list(out.shape), [20])
2616+
# contiguous should work without OOB
2617+
c = out.contiguous()
2618+
self.assertEqual(list(c.shape), [20])
2619+
2620+
def test_zero_size_source_default_args(self):
2621+
"""set_ with 0-size source and no explicit shape/stride."""
2622+
for place in self.places:
2623+
with paddle.base.dygraph.guard(place):
2624+
source = paddle.randn([0, 5])
2625+
x = paddle.randn([10])
2626+
out = x.set_(source)
2627+
self.assertEqual(out.numel().item(), 0)
2628+
self.assertEqual(list(out.shape), [0, 5])
2629+
self.assertTrue(id(x) == id(out))
2630+
2631+
def test_zero_size_x_nonzero_source(self):
2632+
"""set_ with 0-size x but non-zero source should work normally."""
2633+
for place in self.places:
2634+
with paddle.base.dygraph.guard(place):
2635+
source = paddle.to_tensor([1.0, 2.0, 3.0])
2636+
x = paddle.randn([0])
2637+
out = x.set_(source)
2638+
self.assertEqual(list(out.shape), [3])
2639+
self.assertTrue(x._is_shared_buffer_with(source))
2640+
2641+
def test_both_zero_size(self):
2642+
"""set_ with both x and source being 0-size."""
2643+
for place in self.places:
2644+
with paddle.base.dygraph.guard(place):
2645+
source = paddle.randn([0])
2646+
x = paddle.randn([0])
2647+
out = x.set_(source)
2648+
self.assertEqual(out.numel().item(), 0)
2649+
self.assertTrue(id(x) == id(out))
2650+
2651+
def test_both_zero_size_with_nonzero_shape(self):
2652+
"""Both x and source are 0-size but user specifies non-zero dims/stride.
2653+
This covers the branch that allocates zero-filled storage when both
2654+
tensors are empty but a non-zero output shape is requested."""
2655+
for place in self.places:
2656+
with paddle.base.dygraph.guard(place):
2657+
source = paddle.randn([0])
2658+
x = paddle.randn([0])
2659+
out = x.set_(source, [4], [1])
2660+
self.assertEqual(list(out.shape), [4])
2661+
self.assertTrue(id(x) == id(out))
2662+
# The allocated storage should be zero-filled and accessible
2663+
c = out.contiguous()
2664+
self.assertEqual(list(c.shape), [4])
2665+
np.testing.assert_array_equal(
2666+
c.numpy(), np.zeros([4], dtype='float32')
2667+
)
2668+
2669+
def test_both_zero_size_with_nonzero_shape_and_offset(self):
2670+
"""Both x and source are 0-size, user specifies non-zero shape with
2671+
a non-zero offset. Verifies storage is large enough to accommodate
2672+
the offset without out-of-bounds access."""
2673+
for place in self.places:
2674+
with paddle.base.dygraph.guard(place):
2675+
source = paddle.randn([0])
2676+
x = paddle.randn([0])
2677+
# offset must be a multiple of element size (4 bytes for
2678+
# float32) to avoid misaligned GPU memory access.
2679+
out = x.set_(source, [3], [2], 4)
2680+
self.assertEqual(list(out.shape), [3])
2681+
self.assertTrue(id(x) == id(out))
2682+
c = out.contiguous()
2683+
self.assertEqual(list(c.shape), [3])
2684+
np.testing.assert_array_equal(
2685+
c.numpy(), np.zeros([3], dtype='float32')
2686+
)
2687+
2688+
def test_both_zero_size_with_nonzero_2d_shape(self):
2689+
"""Both x and source are 0-size, user specifies a 2D non-zero shape.
2690+
Verifies multi-dimensional strided view is allocated correctly."""
2691+
for place in self.places:
2692+
with paddle.base.dygraph.guard(place):
2693+
source = paddle.randn([0, 0])
2694+
x = paddle.randn([0])
2695+
out = x.set_(source, [2, 3], [3, 1])
2696+
self.assertEqual(list(out.shape), [2, 3])
2697+
self.assertTrue(id(x) == id(out))
2698+
c = out.contiguous()
2699+
self.assertEqual(list(c.shape), [2, 3])
2700+
np.testing.assert_array_equal(
2701+
c.numpy(), np.zeros([2, 3], dtype='float32')
2702+
)
2703+
2704+
def test_zero_size_source_no_crash_on_contiguous(self):
2705+
"""Ensure contiguous() works correctly on a tensor
2706+
that was set_ with a 0-size source but user-specified shape."""
2707+
for place in self.places:
2708+
with paddle.base.dygraph.guard(place):
2709+
source = paddle.randn([0, 3])
2710+
x = paddle.randn([20])
2711+
out = x.set_(source, [20], [2])
2712+
# contiguous should produce a valid tensor with correct shape
2713+
c = out.contiguous()
2714+
self.assertEqual(list(c.shape), [20])
26112715

26122716

26132717
if __name__ == '__main__':

0 commit comments

Comments
 (0)