Skip to content

Commit 4f2c8bf

Browse files
committed
Expose strings_column_view in Cython and use it for char buffer size
Rather than reinventing the wheel, use the libcudf functionality.
1 parent b0c03b3 commit 4f2c8bf

File tree

3 files changed

+23
-18
lines changed

3 files changed

+23
-18
lines changed

cpp/include/cudf/strings/strings_column_view.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -43,6 +43,8 @@ class strings_column_view : private column_view {
4343
* @param strings_column The column view to wrap.
4444
*/
4545
strings_column_view(column_view strings_column);
46+
// So we can use this from cython.
47+
strings_column_view() = default;
4648
strings_column_view(strings_column_view&&) = default; ///< Move constructor
4749
strings_column_view(strings_column_view const&) = default; ///< Copy constructor
4850
~strings_column_view() override = default;

python/pylibcudf/pylibcudf/column.pyx

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ from cpython.pycapsule cimport (
88
)
99

1010
from libc.stddef cimport size_t
11-
from libc.stdint cimport uintptr_t, int32_t, int64_t
11+
from libc.stdint cimport uintptr_t
1212

1313
from libcpp.limits cimport numeric_limits
1414
from libcpp.memory cimport make_unique, unique_ptr
@@ -27,7 +27,8 @@ from pylibcudf.libcudf.interop cimport (
2727
to_arrow_schema_raw,
2828
)
2929
from pylibcudf.libcudf.null_mask cimport bitmask_allocation_size_bytes
30-
from pylibcudf.libcudf.scalar.scalar cimport scalar, numeric_scalar
30+
from pylibcudf.libcudf.scalar.scalar cimport scalar
31+
from pylibcudf.libcudf.strings.strings_column_view cimport strings_column_view
3132
from pylibcudf.libcudf.types cimport size_type, size_of as cpp_size_of, bitmask_type
3233
from pylibcudf.libcudf.utilities.traits cimport is_fixed_width
3334
from pylibcudf.libcudf.copying cimport get_element
@@ -88,26 +89,13 @@ cdef class OwnerWithCAI:
8889
obj = OwnerWithCAI()
8990
obj.owner = owner
9091
cdef size_t size
91-
cdef column_view offsets_column
92-
cdef unique_ptr[scalar] last_offset
9392
if cv.type().id() == type_id.EMPTY:
9493
size = cv.size()
9594
elif is_fixed_width(cv.type()):
9695
size = cv.size() * cpp_size_of(cv.type())
9796
elif cv.type().id() == type_id.STRING:
98-
# The size of the character array in the parent is the offsets size
99-
num_children = cv.num_children()
100-
size = 0
101-
# A strings column with no children is created for empty/all null
102-
if num_children:
103-
offsets_column = cv.child(0)
104-
last_offset = get_element(offsets_column, offsets_column.size() - 1)
105-
if offsets_column.type().id() == type_id.INT32:
106-
size = (<numeric_scalar[int32_t] *> last_offset.get()).value()
107-
elif offsets_column.type().id() == type_id.INT64:
108-
size = (<numeric_scalar[int64_t] *>last_offset.get()).value()
109-
else:
110-
raise RuntimeError("Invalid strings column offset dtype")
97+
# TODO: stream-ordered
98+
size = strings_column_view(cv).chars_size(_get_stream().view())
11199
else:
112100
# All other types store data in the children, so the parent size is 0
113101
size = 0
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION.
2+
3+
from libc.stdint cimport int64_t
4+
from libcpp cimport bool
5+
from libcpp.vector cimport vector
6+
from pylibcudf.exception_handler cimport libcudf_exception_handler
7+
from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
8+
from pylibcudf.libcudf.column.column_view cimport column_view
9+
10+
from rmm.librmm.cuda_stream_view cimport cuda_stream_view
11+
12+
cdef extern from "cudf/strings/strings_column_view.hpp" namespace "cudf" nogil:
13+
cdef cppclass strings_column_view:
14+
strings_column_view(column_view) except +libcudf_exception_handler
15+
int64_t chars_size(cuda_stream_view) except +libcudf_exception_handler

0 commit comments

Comments
 (0)