Skip to content

Commit 33e3681

Browse files
0ax1claude
andcommitted
clean: simplify Arrow device export internals
Cleanups from a review of the Arrow device export path, no behavior change: - Inline the single-caller list_layout_parts 5-tuple helper and the export_list_layout_with_child indirection into export_list and export_list_layout. ListChildExport itself stays: the take-based host list-view rebuild wraps children in transient Dict encodings that the pre-computed schema does not include, so the rebuilt-child arm must keep canonicalizing; its doc now spells that out. - Remove the dead end < offset wrap check in arrow_binary.cu: both addends are u32 widened to u64, so the sum cannot wrap. - Replace the panicking as_ptype on the dyn-dispatch cast target with PType::try_from, matching the source-side check three lines up. - Correct the export_binary rationale: cuDF's Arrow Device import rejects both Binary and BinaryView today (arrow_to_cudf_type maps neither), matching the e2e harness comment. - Fix the misplaced doc comment on export_list_view, and document the validity buffer padding contract on export_arrow_validity_buffer (every path is backed by a 4-byte-padded, tail-zeroed allocation via the device copy layer). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent a289c23 commit 33e3681

3 files changed

Lines changed: 32 additions & 42 deletions

File tree

vortex-cuda/kernels/src/arrow_binary.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ __device__ void init_scan_device(const BinaryView *const __restrict views,
6363
const BinaryViewRef *const view_ref = reinterpret_cast<const BinaryViewRef *>(&view);
6464
const uint64_t buffer_index = static_cast<uint64_t>(view_ref->buffer_index);
6565
const uint64_t offset = static_cast<uint64_t>(view_ref->offset);
66+
// Both addends are u32 widened to u64, so the end position cannot wrap.
6667
const uint64_t end = offset + static_cast<uint64_t>(size);
67-
if (buffer_index >= data_buffer_count || end < offset || end > data_buffer_lens[buffer_index]) {
68+
if (buffer_index >= data_buffer_count || end > data_buffer_lens[buffer_index]) {
6869
scan[idx] = 0;
6970
atomicMax(status, 1u);
7071
continue;

vortex-cuda/src/arrow/canonical.rs

Lines changed: 27 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -448,9 +448,11 @@ where
448448

449449
/// Export Vortex binary views as standard Arrow `Binary`.
450450
///
451-
/// cuDF imports Arrow `Binary` through the Arrow Device path, but does not currently accept
452-
/// Arrow `BinaryView`. This path keeps conversion on the CUDA stream by building `i32` offsets
453-
/// from view sizes and gathering inline/out-of-line view bytes into one contiguous values buffer.
451+
/// cuDF's Arrow Device import currently rejects both Arrow `Binary` and Arrow `BinaryView`
452+
/// (unlike `Utf8View`, which it accepts as strings), so standard `Binary` is exported as the
453+
/// layout other Arrow Device consumers accept most widely. This path keeps conversion on the
454+
/// CUDA stream by building `i32` offsets from view sizes and gathering inline/out-of-line view
455+
/// bytes into one contiguous values buffer.
454456
async fn export_binary(
455457
varbinview: VarBinViewArray,
456458
ctx: &mut CudaExecutionCtx,
@@ -679,6 +681,11 @@ fn gather_binary_values(
679681
/// Export Vortex validity as an Arrow validity byte buffer on the CUDA device.
680682
///
681683
/// Returns `None` for the buffer when Arrow can omit validity because all rows are valid.
684+
///
685+
/// Every returned buffer is backed by an allocation padded to a 4-byte multiple with zeroed
686+
/// padding so cuDF's word-sized mask reads stay in bounds: the fast path through the device
687+
/// copy's tail zeroing, the other paths through their own padded allocations. Bits at positions
688+
/// `>= len + arrow_offset` within the final data byte are unspecified, as Arrow permits.
682689
pub(super) async fn export_arrow_validity_buffer(
683690
validity: Validity,
684691
len: usize,
@@ -788,7 +795,7 @@ pub(super) fn repack_arrow_validity_buffer(
788795
Ok(BufferHandle::new_device(Arc::new(output_device)).slice(0..output_bytes))
789796
}
790797

791-
/// Export a standard Vortex list as Arrow `List`: validity, offsets, and one child array.
798+
/// Export a Vortex list-view as Arrow `List`.
792799
async fn export_list_view(
793800
listview: ListViewArray,
794801
ctx: &mut CudaExecutionCtx,
@@ -815,29 +822,12 @@ async fn export_list_view(
815822
.await
816823
}
817824

825+
/// Export a standard Vortex list as Arrow `List`: validity, offsets, and one child array.
818826
async fn export_list(
819827
array: ListArray,
820828
child_export: ListChildExport,
821829
ctx: &mut CudaExecutionCtx,
822830
) -> VortexResult<(ArrowArray, SyncEvent)> {
823-
let (elements, len, validity_buffer, null_count, offsets_buffer) =
824-
list_layout_parts(array, ctx).await?;
825-
export_list_layout(
826-
elements,
827-
len,
828-
validity_buffer,
829-
null_count,
830-
offsets_buffer,
831-
child_export,
832-
ctx,
833-
)
834-
.await
835-
}
836-
837-
async fn list_layout_parts(
838-
array: ListArray,
839-
ctx: &mut CudaExecutionCtx,
840-
) -> VortexResult<(ArrayRef, usize, Option<BufferHandle>, i64, BufferHandle)> {
841831
let len = array.len();
842832
let ListDataParts {
843833
elements,
@@ -848,7 +838,16 @@ async fn list_layout_parts(
848838

849839
let (validity_buffer, null_count) = export_arrow_validity_buffer(validity, len, 0, ctx).await?;
850840
let offsets_buffer = export_arrow_list_offsets(offsets, ctx).await?;
851-
Ok((elements, len, validity_buffer, null_count, offsets_buffer))
841+
export_list_layout(
842+
elements,
843+
len,
844+
validity_buffer,
845+
null_count,
846+
offsets_buffer,
847+
child_export,
848+
ctx,
849+
)
850+
.await
852851
}
853852

854853
#[derive(Clone, Copy)]
@@ -858,6 +857,11 @@ pub(super) enum ListChildExport {
858857
PreserveConcreteLayout,
859858
/// Canonicalize temporary encodings introduced by the host ListView
860859
/// rebuild, while still preserving rebuilt dictionary children.
860+
///
861+
/// This is not equivalent to `export_array`: the take-based rebuild wraps
862+
/// children in transient encodings (for example `take` returns `Dict`
863+
/// arrays nested inside struct fields) that the pre-computed export schema
864+
/// does not include, so they must be canonicalized away before export.
861865
RebuiltListViewChild,
862866
}
863867

@@ -891,24 +895,7 @@ pub(super) async fn export_list_layout(
891895
ctx: &mut CudaExecutionCtx,
892896
) -> VortexResult<(ArrowArray, SyncEvent)> {
893897
let elements_child = child_export.export(elements, ctx).await?;
894-
export_list_layout_with_child(
895-
elements_child,
896-
len,
897-
validity_buffer,
898-
null_count,
899-
offsets_buffer,
900-
ctx,
901-
)
902-
}
903898

904-
fn export_list_layout_with_child(
905-
elements_child: ArrowArray,
906-
len: usize,
907-
validity_buffer: Option<BufferHandle>,
908-
null_count: i64,
909-
offsets_buffer: BufferHandle,
910-
ctx: &mut CudaExecutionCtx,
911-
) -> VortexResult<(ArrowArray, SyncEvent)> {
912899
let mut private_data = PrivateData::new(
913900
vec![validity_buffer, Some(offsets_buffer)],
914901
vec![elements_child],

vortex-cuda/src/dynamic_dispatch/plan_builder.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,9 @@ fn is_dyn_dispatch_cast_compatible(array: &ArrayRef) -> bool {
139139
let Ok(source_ptype) = PType::try_from(cast.child_at(0).dtype()) else {
140140
return false;
141141
};
142-
let target_ptype = cast.scalar_fn().as_::<Cast>().as_ptype();
142+
let Ok(target_ptype) = PType::try_from(cast.scalar_fn().as_::<Cast>()) else {
143+
return false;
144+
};
143145

144146
// Implemented as unsigned dictionary-code casts to cuDF's signed index types.
145147
// LOAD/BITUNPACK materialize directly into the target-width output type.

0 commit comments

Comments
 (0)