Skip to content

Commit e087608

Browse files
committed
Tighten docs and flag aliasing follow-up
1 parent 612774f commit e087608

4 files changed

Lines changed: 47 additions & 21 deletions

File tree

crates/duckdb/src/core/data_chunk.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,22 @@ impl Drop for DataChunkHandle {
2626
}
2727

2828
impl DataChunkHandle {
29-
#[allow(dead_code)]
29+
/// Wrap a `duckdb_data_chunk` pointer owned elsewhere (e.g. by a DuckDB
30+
/// callback frame) without taking ownership.
31+
///
32+
/// # Safety
33+
///
34+
/// `ptr` must be a valid `duckdb_data_chunk` that remains allocated and
35+
/// usable for the full lifetime of the returned handle. The caller must
36+
/// ensure DuckDB does not destroy the chunk, free any column vectors
37+
/// reachable from it, or concurrently mutate the same chunk/vector state
38+
/// while this handle or any vectors derived from it are in use.
39+
///
40+
/// TODO(#673 follow-up): this handle's `flat_vector` / `list_vector` /
41+
/// `array_vector` / `struct_vector` accessors currently take `&self` and
42+
/// return writable wrappers, so safe code can obtain two wrappers over
43+
/// the same column and produce aliased `&mut [T]` slices.
44+
#[allow(dead_code)] // used only when `vtab` / `vscalar` features are enabled
3045
pub(crate) unsafe fn new_unowned(ptr: duckdb_data_chunk) -> Self {
3146
Self { ptr, owned: false }
3247
}

crates/duckdb/src/core/vector.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ use crate::ffi::{
1717
/// A flat vector borrowed from a [`DataChunkHandle`].
1818
///
1919
/// The `'a` lifetime ties the vector to the chunk it was obtained from,
20-
/// preventing the chunk from being dropped while the vector is still alive
21-
/// (see <https://github.com/duckdb/duckdb-rs/issues/673>).
20+
/// preventing the chunk from being dropped while the vector is still alive.
2221
pub struct FlatVector<'a> {
2322
ptr: duckdb_vector,
2423
capacity: usize,
@@ -166,8 +165,7 @@ impl Inserter<&Vec<u8>> for FlatVector<'_> {
166165
/// A list vector borrowed from a [`DataChunkHandle`].
167166
///
168167
/// The `'a` lifetime ties the vector to the chunk it was obtained from,
169-
/// preventing the chunk from being dropped while the vector is still alive
170-
/// (see <https://github.com/duckdb/duckdb-rs/issues/673>).
168+
/// preventing the chunk from being dropped while the vector is still alive.
171169
///
172170
/// Regression test for the use-after-free in issue #673: a `ListVector`
173171
/// must not be allowed to outlive its parent `DataChunkHandle`.
@@ -279,11 +277,10 @@ impl<'a> ListVector<'a> {
279277
}
280278
}
281279

282-
/// A array vector (fixed-size list) borrowed from a [`DataChunkHandle`].
280+
/// An array vector (fixed-size list) borrowed from a [`DataChunkHandle`].
283281
///
284282
/// The `'a` lifetime ties the vector to the chunk it was obtained from,
285-
/// preventing the chunk from being dropped while the vector is still alive
286-
/// (see <https://github.com/duckdb/duckdb-rs/issues/673>).
283+
/// preventing the chunk from being dropped while the vector is still alive.
287284
pub struct ArrayVector<'a> {
288285
ptr: duckdb_vector,
289286
_phantom: PhantomData<&'a ()>,
@@ -338,8 +335,7 @@ impl<'a> ArrayVector<'a> {
338335
/// A struct vector borrowed from a [`DataChunkHandle`].
339336
///
340337
/// The `'a` lifetime ties the vector to the chunk it was obtained from,
341-
/// preventing the chunk from being dropped while the vector is still alive
342-
/// (see <https://github.com/duckdb/duckdb-rs/issues/673>).
338+
/// preventing the chunk from being dropped while the vector is still alive.
343339
pub struct StructVector<'a> {
344340
ptr: duckdb_vector,
345341
_phantom: PhantomData<&'a ()>,

crates/duckdb/src/vscalar/mod.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,19 @@ pub trait VScalar: Sized {
2727
/// Shared across worker threads and invocations — must not be modified during execution.
2828
/// Must be `'static` as it is stored in DuckDB and may outlive the current stack frame.
2929
type State: Sized + Send + Sync + 'static;
30-
/// The actual function
30+
/// The actual function.
3131
///
32-
/// # Safety
33-
///
34-
/// This function is unsafe because it:
32+
/// DuckDB guarantees that `input` and `output` stay live for the
33+
/// duration of this call. Implementations are expected to populate
34+
/// `output` for rows `0..input.len()` and must not read or write beyond
35+
/// that range.
3536
///
36-
/// - Dereferences multiple raw pointers (`func`).
37+
/// # Safety
3738
///
39+
/// This method is `unsafe` because the `input` chunk and `output` vector
40+
/// wrap raw DuckDB pointers (see [`DataChunkHandle::new_unowned`] and the
41+
/// `WritableVector` impl on `duckdb_vector`). Callers must guarantee those
42+
/// pointers are valid DuckDB-owned storage for the duration of the call.
3843
unsafe fn invoke(
3944
state: &Self::State,
4045
input: &mut DataChunkHandle,

crates/duckdb/src/vtab/arrow.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -636,26 +636,36 @@ pub fn write_arrow_array_to_vector(
636636
Ok(())
637637
}
638638

639+
// `duckdb_vector` is a `Copy` raw pointer typedef, so the `'_` lifetime on the
640+
// returned wrappers below is tied to `&mut self` (i.e. to the local variable
641+
// holding the pointer, not to the underlying C++ vector). This prevents reusing
642+
// the same `duckdb_vector` handle while a wrapper is in flight, but it does **not**
643+
// track the C++ vector's liveness. Callers must independently guarantee that the
644+
// underlying DuckDB vector outlives the returned wrapper.
645+
//
646+
// TODO(#673 follow-up): this `impl` is reachable from safe downstream code
647+
// because `WritableVector` is a public safe trait with a public impl for the
648+
// raw `duckdb_vector` pointer type. Marking the trait `unsafe` (or removing
649+
// the raw-pointer impl entirely) would move this obligation into the type
650+
// system instead of relying on caller discipline.
639651
impl WritableVector for duckdb_vector {
640652
fn array_vector(&mut self) -> ArrayVector<'_> {
641-
// SAFETY: the returned vector borrows from `&mut self`, so it cannot
642-
// outlive this `duckdb_vector` handle. The caller is responsible for
643-
// ensuring the underlying C++ vector outlives this handle.
653+
// SAFETY: see module note above `impl WritableVector for duckdb_vector`.
644654
unsafe { ArrayVector::from_raw(*self) }
645655
}
646656

647657
fn flat_vector(&mut self) -> FlatVector<'_> {
648-
// SAFETY: see `array_vector`.
658+
// SAFETY: see module note above `impl WritableVector for duckdb_vector`.
649659
unsafe { FlatVector::from_raw(*self) }
650660
}
651661

652662
fn list_vector(&mut self) -> ListVector<'_> {
653-
// SAFETY: see `array_vector`.
663+
// SAFETY: see module note above `impl WritableVector for duckdb_vector`.
654664
unsafe { ListVector::from_raw(*self) }
655665
}
656666

657667
fn struct_vector(&mut self) -> StructVector<'_> {
658-
// SAFETY: see `array_vector`.
668+
// SAFETY: see module note above `impl WritableVector for duckdb_vector`.
659669
unsafe { StructVector::from_raw(*self) }
660670
}
661671
}

0 commit comments

Comments
 (0)