Skip to content

Commit 3fc67d9

Browse files
authored
Fix integer overflows in pylibcudf from_column_view_of_arbitrary (#18758)
Although the number of _rows_ in a `column_view` cannot exceed `size_type`, the number of _bytes_ surely can. So fix that. This fixes multi-GPU errors we are seeing in rapidsmpf when communicating buffers. Authors: - Lawrence Mitchell (https://github.com/wence-) - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Murray (https://github.com/Matt711) - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - https://github.com/nvdbaranec URL: #18758
1 parent a034efe commit 3fc67d9

File tree

7 files changed

+31
-21
lines changed

7 files changed

+31
-21
lines changed

cpp/include/cudf/strings/strings_column_view.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class strings_column_view : private column_view {
4343
* @param strings_column The column view to wrap.
4444
*/
4545
strings_column_view(column_view strings_column);
46+
// So we can use this from cython.
47+
strings_column_view() = default;
4648
strings_column_view(strings_column_view&&) = default; ///< Move constructor
4749
strings_column_view(strings_column_view const&) = default; ///< Copy constructor
4850
~strings_column_view() override = default;

python/pylibcudf/pylibcudf/column.pyx

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ from pylibcudf.libcudf.interop cimport (
2626
to_arrow_device_raw,
2727
to_arrow_schema_raw,
2828
)
29-
from pylibcudf.libcudf.scalar.scalar cimport scalar, numeric_scalar
29+
from pylibcudf.libcudf.null_mask cimport bitmask_allocation_size_bytes
30+
from pylibcudf.libcudf.scalar.scalar cimport scalar
31+
from pylibcudf.libcudf.strings.strings_column_view cimport strings_column_view
3032
from pylibcudf.libcudf.types cimport size_type, size_of as cpp_size_of, bitmask_type
3133
from pylibcudf.libcudf.utilities.traits cimport is_fixed_width
3234
from pylibcudf.libcudf.copying cimport get_element
@@ -50,7 +52,6 @@ from ._interop_helpers cimport (
5052
_release_device_array,
5153
_metadata_to_libcudf,
5254
)
53-
from .null_mask cimport bitmask_allocation_size_bytes
5455
from .utils cimport _get_stream
5556

5657
from .gpumemoryview import _datatype_from_dtype_desc
@@ -76,21 +77,14 @@ cdef class OwnerWithCAI:
7677
# The default size of 0 will be applied for any type that stores data in the
7778
# children (such that the parent size is 0).
7879
size = 0
79-
cdef column_view offsets_column
80-
cdef unique_ptr[scalar] last_offset
8180
if cv.type().id() == type_id.EMPTY:
8281
size = cv.size()
8382
elif is_fixed_width(cv.type()):
84-
# Cast to Python integers before multiplyling to avoid overflow.
83+
# Cast to Python integers before multiplying to avoid overflow.
8584
size = int(cv.size()) * int(cpp_size_of(cv.type()))
8685
elif cv.type().id() == type_id.STRING:
87-
# A strings column with no children is created for empty/all null, in which
88-
# case the size remains 0. Otherwise, the size of the character array stored
89-
# in the parent is the last offset in the offsets child.
90-
if cv.num_children():
91-
offsets_column = cv.child(0)
92-
last_offset = get_element(offsets_column, offsets_column.size() - 1)
93-
size = (<numeric_scalar[size_type] *> last_offset.get()).value()
86+
# TODO: stream-ordered
87+
size = strings_column_view(cv).chars_size(_get_stream().view())
9488

9589
obj.cai = {
9690
"shape": (size,),

python/pylibcudf/pylibcudf/gpumemoryview.pyx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
22

3+
from libc.stddef cimport size_t
34
import functools
45
import operator
56

67
from .types cimport DataType, size_of, type_id
78

8-
from pylibcudf.libcudf.types cimport size_type
9-
109

1110
__all__ = ["gpumemoryview"]
1211

@@ -62,7 +61,7 @@ cdef class gpumemoryview:
6261
self.ptr = cai["data"][0]
6362

6463
# Compute the buffer size.
65-
cdef size_type itemsize = size_of(
64+
cdef size_t itemsize = size_of(
6665
_datatype_from_dtype_desc(
6766
cai["typestr"][1:] # ignore the byteorder (the first char).
6867
)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION.
2+
3+
from libc.stdint cimport int64_t
4+
from pylibcudf.exception_handler cimport libcudf_exception_handler
5+
from pylibcudf.libcudf.column.column_view cimport column_view
6+
7+
from rmm.librmm.cuda_stream_view cimport cuda_stream_view
8+
9+
cdef extern from "cudf/strings/strings_column_view.hpp" namespace "cudf" nogil:
10+
cdef cppclass strings_column_view:
11+
strings_column_view(column_view) except +libcudf_exception_handler
12+
int64_t chars_size(cuda_stream_view) except +libcudf_exception_handler

python/pylibcudf/pylibcudf/libcudf/types.pxd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
from libc.stddef cimport size_t
23
from libc.stdint cimport int32_t, uint32_t
34
from libcpp cimport bool
45
from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -100,4 +101,4 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
100101
MIDPOINT
101102
NEAREST
102103

103-
cdef size_type size_of(data_type t) except +libcudf_exception_handler
104+
cdef size_t size_of(data_type t) except +libcudf_exception_handler

python/pylibcudf/pylibcudf/types.pxd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
22

3+
from libc.stddef cimport size_t
34
from libc.stdint cimport int32_t
45
from libcpp cimport bool as cbool
56
from pylibcudf.libcudf.types cimport (
@@ -27,4 +28,4 @@ cdef class DataType:
2728
@staticmethod
2829
cdef DataType from_libcudf(data_type dt)
2930

30-
cpdef size_type size_of(DataType t)
31+
cpdef size_t size_of(DataType t)

python/pylibcudf/pylibcudf/types.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
22

3+
from libc.stddef cimport size_t
34
from libc.stdint cimport int32_t
45
from pylibcudf.libcudf.types cimport (
56
data_type,
@@ -92,7 +93,7 @@ cdef class DataType:
9293
ret.c_obj = dt
9394
return ret
9495

95-
cpdef size_type size_of(DataType t):
96+
cpdef size_t size_of(DataType t):
9697
"""Returns the size in bytes of elements of the specified data_type.
9798
9899
Only fixed-width types are supported.

0 commit comments

Comments
 (0)