Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions crates/duckdb/src/core/data_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,20 @@ impl Drop for DataChunkHandle {
}

impl DataChunkHandle {
#[allow(dead_code)]
/// Wrap a `duckdb_data_chunk` pointer owned elsewhere (e.g. by a DuckDB
/// callback frame) without taking ownership.
///
/// # Safety
///
/// `ptr` must be a valid `duckdb_data_chunk` that stays allocated and
/// unmutated by other code for the full lifetime of the returned handle
/// and any vectors derived from it.
//
// Known aliasing hole (#673 follow-up): `flat_vector` / `list_vector` /
// `array_vector` / `struct_vector` take `&self`, so safe code can get two
// writable wrappers over the same column and produce aliased `&mut [T]`
// slices.
#[allow(dead_code)] // used only when `vtab` / `vscalar` features are enabled
pub(crate) unsafe fn new_unowned(ptr: duckdb_data_chunk) -> Self {
Self { ptr, owned: false }
}
Expand All @@ -41,23 +54,23 @@ impl DataChunkHandle {
}

/// Get the vector at the specific column index: `idx`.
pub fn flat_vector(&self, idx: usize) -> FlatVector {
FlatVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) })
pub fn flat_vector(&self, idx: usize) -> FlatVector<'_> {
unsafe { FlatVector::from_raw(duckdb_data_chunk_get_vector(self.ptr, idx as u64)) }
}

/// Get a list vector from the column index.
pub fn list_vector(&self, idx: usize) -> ListVector {
ListVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) })
pub fn list_vector(&self, idx: usize) -> ListVector<'_> {
unsafe { ListVector::from_raw(duckdb_data_chunk_get_vector(self.ptr, idx as u64)) }
}

/// Get a array vector from the column index.
pub fn array_vector(&self, idx: usize) -> ArrayVector {
ArrayVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) })
pub fn array_vector(&self, idx: usize) -> ArrayVector<'_> {
unsafe { ArrayVector::from_raw(duckdb_data_chunk_get_vector(self.ptr, idx as u64)) }
}

/// Get struct vector at the column index: `idx`.
pub fn struct_vector(&self, idx: usize) -> StructVector {
StructVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) })
pub fn struct_vector(&self, idx: usize) -> StructVector<'_> {
unsafe { StructVector::from_raw(duckdb_data_chunk_get_vector(self.ptr, idx as u64)) }
}

/// Set the size of the data chunk
Expand Down
175 changes: 107 additions & 68 deletions crates/duckdb/src/core/vector.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
use std::{any::Any, ffi::CString, slice};
//! Borrowed vector wrappers over [`duckdb_vector`].
//!
//! Each wrapper type carries a lifetime `'a`. When obtained via a
//! [`DataChunkHandle`][crate::core::DataChunkHandle] accessor, `'a` is
//! bound to the chunk so the wrapper cannot outlive it. When built via
//! one of the `unsafe fn from_raw` constructors (including the raw
//! `duckdb_vector` path used by `vtab::arrow`), `'a` is caller-chosen and
//! must not exceed the DuckDB vector's actual validity — that path does not
//! track liveness in the type system.

use std::{ffi::CString, marker::PhantomData, slice};

use libduckdb_sys::{
DuckDbString, duckdb_array_type_array_size, duckdb_array_vector_get_child, duckdb_validity_row_is_valid,
Expand All @@ -14,42 +24,33 @@ use crate::ffi::{
duckdb_vector_get_validity, duckdb_vector_size,
};

/// Vector trait.
pub trait Vector {
/// Returns a reference to the underlying Any type that this trait object
fn as_any(&self) -> &dyn Any;
/// Returns a mutable reference to the underlying Any type that this trait object
fn as_mut_any(&mut self) -> &mut dyn Any;
}

/// A flat vector
pub struct FlatVector {
/// A flat (contiguous, scalar-row) vector borrowed from a
/// [`DataChunkHandle`][crate::core::DataChunkHandle].
pub struct FlatVector<'a> {
ptr: duckdb_vector,
capacity: usize,
_phantom: PhantomData<&'a ()>,
}

impl From<duckdb_vector> for FlatVector {
fn from(ptr: duckdb_vector) -> Self {
impl<'a> FlatVector<'a> {
/// Wrap a raw `duckdb_vector` pointer.
///
/// # Safety
/// `ptr` must be a valid `duckdb_vector` that remains valid for all of `'a`.
pub(crate) unsafe fn from_raw(ptr: duckdb_vector) -> Self {
Self {
ptr,
capacity: unsafe { duckdb_vector_size() as usize },
_phantom: PhantomData,
}
}
}

impl Vector for FlatVector {
fn as_any(&self) -> &dyn Any {
self
}

fn as_mut_any(&mut self) -> &mut dyn Any {
self
}
}

impl FlatVector {
fn with_capacity(ptr: duckdb_vector, capacity: usize) -> Self {
Self { ptr, capacity }
Self {
ptr,
capacity,
_phantom: PhantomData,
}
}

/// Returns the capacity of the vector
Expand Down Expand Up @@ -127,27 +128,27 @@ pub trait Inserter<T> {
fn insert(&self, index: usize, value: T);
}

impl Inserter<CString> for FlatVector {
impl Inserter<CString> for FlatVector<'_> {
fn insert(&self, index: usize, value: CString) {
unsafe {
duckdb_vector_assign_string_element(self.ptr, index as u64, value.as_ptr());
}
}
}

impl Inserter<&str> for FlatVector {
impl Inserter<&str> for FlatVector<'_> {
fn insert(&self, index: usize, value: &str) {
self.insert(index, value.as_bytes());
}
}

impl Inserter<&String> for FlatVector {
impl Inserter<&String> for FlatVector<'_> {
fn insert(&self, index: usize, value: &String) {
self.insert(index, value.as_str());
}
}

impl Inserter<&[u8]> for FlatVector {
impl Inserter<&[u8]> for FlatVector<'_> {
fn insert(&self, index: usize, value: &[u8]) {
let value_size = value.len();
unsafe {
Expand All @@ -162,27 +163,48 @@ impl Inserter<&[u8]> for FlatVector {
}
}

impl Inserter<&Vec<u8>> for FlatVector {
impl Inserter<&Vec<u8>> for FlatVector<'_> {
fn insert(&self, index: usize, value: &Vec<u8>) {
self.insert(index, value.as_slice());
}
}

/// A list vector.
pub struct ListVector {
/// A list vector borrowed from a [`DataChunkHandle`][crate::core::DataChunkHandle].
///
/// Stores list entry offsets and lengths; elements live in a separate child vector.
///
/// Regression guard for #673 — a `ListVector` must not outlive its parent chunk:
///
/// ```compile_fail
/// use duckdb::core::{DataChunkHandle, LogicalTypeHandle, LogicalTypeId};
///
/// let vec;
/// {
/// let list_type = LogicalTypeHandle::list(&LogicalTypeId::Integer.into());
/// let chunk = DataChunkHandle::new(&[list_type]);
/// chunk.set_len(1);
/// let v = chunk.list_vector(0);
/// vec = v;
/// }
/// // chunk goes out of scope here; borrow checking rejects the outer use.
/// let _ = vec.get_entry(0);
/// ```
pub struct ListVector<'a> {
/// ListVector does not own the vector pointer.
entries: FlatVector,
entries: FlatVector<'a>,
}

impl From<duckdb_vector> for ListVector {
fn from(ptr: duckdb_vector) -> Self {
impl<'a> ListVector<'a> {
/// Wrap a raw `duckdb_vector` pointer.
///
/// # Safety
/// `ptr` must be a valid `duckdb_vector` that remains valid for all of `'a`.
pub(crate) unsafe fn from_raw(ptr: duckdb_vector) -> Self {
Self {
entries: FlatVector::from(ptr),
entries: unsafe { FlatVector::from_raw(ptr) },
}
}
}

impl ListVector {
/// Returns the number of entries in the list vector.
pub fn len(&self) -> usize {
unsafe { duckdb_list_vector_get_size(self.entries.ptr) as usize }
Expand All @@ -195,25 +217,25 @@ impl ListVector {

/// Returns the child vector.
// TODO: not ideal interface. Where should we keep capacity.
pub fn child(&self, capacity: usize) -> FlatVector {
pub fn child(&self, capacity: usize) -> FlatVector<'a> {
self.reserve(capacity);
FlatVector::with_capacity(unsafe { duckdb_list_vector_get_child(self.entries.ptr) }, capacity)
}

/// Take the child as [StructVector].
pub fn struct_child(&self, capacity: usize) -> StructVector {
pub fn struct_child(&self, capacity: usize) -> StructVector<'a> {
self.reserve(capacity);
StructVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
unsafe { StructVector::from_raw(duckdb_list_vector_get_child(self.entries.ptr)) }
}

/// Take the child as [ArrayVector].
pub fn array_child(&self) -> ArrayVector {
ArrayVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
pub fn array_child(&self) -> ArrayVector<'a> {
unsafe { ArrayVector::from_raw(duckdb_list_vector_get_child(self.entries.ptr)) }
}

/// Take the child as [ListVector].
pub fn list_child(&self) -> Self {
Self::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
pub fn list_child(&self) -> ListVector<'a> {
unsafe { ListVector::from_raw(duckdb_list_vector_get_child(self.entries.ptr)) }
}

/// Set primitive data to the child node.
Expand Down Expand Up @@ -258,18 +280,27 @@ impl ListVector {
}
}

/// A array vector. (fixed-size list)
pub struct ArrayVector {
/// A fixed-size list vector borrowed from a
/// [`DataChunkHandle`][crate::core::DataChunkHandle].
///
/// Exposes a fixed-width list whose child storage is contiguous across all rows.
pub struct ArrayVector<'a> {
ptr: duckdb_vector,
_phantom: PhantomData<&'a ()>,
}

impl From<duckdb_vector> for ArrayVector {
fn from(ptr: duckdb_vector) -> Self {
Self { ptr }
impl<'a> ArrayVector<'a> {
/// Wrap a raw `duckdb_vector` pointer.
///
/// # Safety
/// `ptr` must be a valid `duckdb_vector` that remains valid for all of `'a`.
pub(crate) unsafe fn from_raw(ptr: duckdb_vector) -> Self {
Self {
ptr,
_phantom: PhantomData,
}
}
}

impl ArrayVector {
/// Get the logical type of this ArrayVector.
pub fn logical_type(&self) -> LogicalTypeHandle {
unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
Expand All @@ -284,7 +315,7 @@ impl ArrayVector {
/// Returns the child vector.
/// capacity should be a multiple of the array size.
// TODO: not ideal interface. Where should we keep count.
pub fn child(&self, capacity: usize) -> FlatVector {
pub fn child(&self, capacity: usize) -> FlatVector<'a> {
FlatVector::with_capacity(unsafe { duckdb_array_vector_get_child(self.ptr) }, capacity)
}

Expand All @@ -303,39 +334,47 @@ impl ArrayVector {
}
}

/// A struct vector.
pub struct StructVector {
/// A struct vector borrowed from a [`DataChunkHandle`][crate::core::DataChunkHandle].
///
/// Groups one child vector per struct field, all sharing the same row count.
pub struct StructVector<'a> {
ptr: duckdb_vector,
_phantom: PhantomData<&'a ()>,
}

impl From<duckdb_vector> for StructVector {
fn from(ptr: duckdb_vector) -> Self {
Self { ptr }
impl<'a> StructVector<'a> {
/// Wrap a raw `duckdb_vector` pointer.
///
/// # Safety
/// `ptr` must be a valid `duckdb_vector` that remains valid for all of `'a`.
pub(crate) unsafe fn from_raw(ptr: duckdb_vector) -> Self {
Self {
ptr,
_phantom: PhantomData,
}
}
}

impl StructVector {
/// Returns the child by idx in the list vector.
pub fn child(&self, idx: usize, capacity: usize) -> FlatVector {
pub fn child(&self, idx: usize, capacity: usize) -> FlatVector<'a> {
FlatVector::with_capacity(
unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) },
capacity,
)
}

/// Take the child as [StructVector].
pub fn struct_vector_child(&self, idx: usize) -> Self {
Self::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
pub fn struct_vector_child(&self, idx: usize) -> StructVector<'a> {
unsafe { StructVector::from_raw(duckdb_struct_vector_get_child(self.ptr, idx as u64)) }
}

/// Take the child as [ListVector].
pub fn list_vector_child(&self, idx: usize) -> ListVector {
ListVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
pub fn list_vector_child(&self, idx: usize) -> ListVector<'a> {
unsafe { ListVector::from_raw(duckdb_struct_vector_get_child(self.ptr, idx as u64)) }
}

/// Take the child as [ArrayVector].
pub fn array_vector_child(&self, idx: usize) -> ArrayVector {
ArrayVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
pub fn array_vector_child(&self, idx: usize) -> ArrayVector<'a> {
unsafe { ArrayVector::from_raw(duckdb_struct_vector_get_child(self.ptr, idx as u64)) }
}

/// Get the logical type of this struct vector.
Expand Down
19 changes: 15 additions & 4 deletions crates/duckdb/src/vscalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,25 @@ pub trait VScalar: Sized {
/// Shared across worker threads and invocations — must not be modified during execution.
/// Must be `'static` as it is stored in DuckDB and may outlive the current stack frame.
type State: Sized + Send + Sync + 'static;
/// The actual function
/// The actual function.
///
/// # Safety
/// DuckDB guarantees that `input` and `output` stay live for the duration
/// of this call. Implementations must populate `output` for rows
/// `0..input.len()` and must not read or write beyond that range.
///
/// This function is unsafe because it:
/// # Safety
///
/// - Dereferences multiple raw pointers (`func`).
/// Called by the DuckDB trampoline with wrappers over borrowed DuckDB
/// storage. Implementations must:
///
/// - only read and write within the rows and column types DuckDB provided
/// for this invocation;
/// - not retain `input`, `output`, or any vector/slice derived from them
/// past return;
/// - not hold two writable wrappers over the same column at the same
/// time. The wrapper types do not currently prevent this: calling e.g.
/// `input.flat_vector(0)` twice and then `as_mut_slice` on each yields
/// overlapping `&mut [T]`, which is undefined behavior.
unsafe fn invoke(
state: &Self::State,
input: &mut DataChunkHandle,
Expand Down
Loading
Loading