Skip to content

Commit 4c25ac7

Browse files
authored
Fix unsound vector slice API (#745)
By making typed vector slices unsafe. **Breaking**: `FlatVector::{as_slice, as_slice_with_len, as_mut_slice, as_mut_slice_with_len, copy}`, `ListVector::set_child`, and `ArrayVector::set_child` are now unsafe, requiring downstream callers to update call sites. rustsec/advisory-db#2767
2 parents 8aa176a + e992d25 commit 4c25ac7

5 files changed

Lines changed: 101 additions & 55 deletions

File tree

crates/duckdb/src/core/data_chunk.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ mod test {
116116
fn test_vector() {
117117
let datachunk = DataChunkHandle::new(&[LogicalTypeHandle::from(LogicalTypeId::Bigint)]);
118118
let mut vector = datachunk.flat_vector(0);
119-
let data = vector.as_mut_slice::<i64>();
119+
let data = unsafe { vector.as_mut_slice::<i64>() };
120120

121121
data[0] = 42;
122122
}

crates/duckdb/src/core/vector.rs

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,28 +76,57 @@ impl<'a> FlatVector<'a> {
7676
!valid
7777
}
7878

79-
/// Returns an unsafe mutable pointer to the vector’s
80-
pub fn as_mut_ptr<T>(&self) -> *mut T {
79+
/// Returns a mutable pointer to the vector's backing data cast to `T`.
80+
///
81+
/// # Safety
82+
/// The caller must ensure `T` matches the DuckDB vector's physical storage.
83+
/// Before dereferencing the pointer, the caller must also ensure the target
84+
/// element is initialized, in bounds, and not accessed through another
85+
/// incompatible or aliased reference.
86+
pub unsafe fn as_mut_ptr<T>(&self) -> *mut T {
8187
unsafe { duckdb_vector_get_data(self.ptr).cast() }
8288
}
8389

84-
/// Returns a slice of the vector
85-
pub fn as_slice<T>(&self) -> &[T] {
90+
/// Returns a typed slice of the vector.
91+
///
92+
/// # Safety
93+
/// The caller must ensure `T` matches the DuckDB vector's physical storage,
94+
/// the backing allocation is sized for `self.capacity()` elements of `T`,
95+
/// each returned element is initialized before it is read, and no `&mut`
96+
/// references to the same storage exist for the returned lifetime.
97+
pub unsafe fn as_slice<T>(&self) -> &[T] {
8698
unsafe { slice::from_raw_parts(self.as_mut_ptr(), self.capacity()) }
8799
}
88100

89-
/// Returns a slice of the vector up to a certain length
90-
pub fn as_slice_with_len<T>(&self, len: usize) -> &[T] {
101+
/// Returns a typed slice of the vector up to a certain length.
102+
///
103+
/// # Safety
104+
/// The caller must ensure `T` matches the DuckDB vector's physical storage,
105+
/// the backing allocation is sized for at least `len` elements of `T`, each
106+
/// returned element is initialized before it is read, and no `&mut`
107+
/// references to the same storage exist for the returned lifetime.
108+
pub unsafe fn as_slice_with_len<T>(&self, len: usize) -> &[T] {
91109
unsafe { slice::from_raw_parts(self.as_mut_ptr(), len) }
92110
}
93111

94-
/// Returns a mutable slice of the vector
95-
pub fn as_mut_slice<T>(&mut self) -> &mut [T] {
112+
/// Returns a typed mutable slice of the vector.
113+
///
114+
/// # Safety
115+
/// The caller must ensure `T` matches the DuckDB vector's physical storage,
116+
/// the backing allocation is sized for `self.capacity()` elements of `T`,
117+
/// and no other references to the same storage exist for the returned
118+
/// lifetime.
119+
pub unsafe fn as_mut_slice<T>(&mut self) -> &mut [T] {
96120
unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity()) }
97121
}
98122

99-
/// Returns a mutable slice of the vector up to a certain length
100-
pub fn as_mut_slice_with_len<T>(&mut self, len: usize) -> &mut [T] {
123+
/// Returns a typed mutable slice of the vector up to a certain length.
124+
///
125+
/// # Safety
126+
/// The caller must ensure `T` matches the DuckDB vector's physical storage,
127+
/// the backing allocation is sized for at least `len` elements of `T`, and
128+
/// no other references to the same storage exist for the returned lifetime.
129+
pub unsafe fn as_mut_slice_with_len<T>(&mut self, len: usize) -> &mut [T] {
101130
unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
102131
}
103132

@@ -115,10 +144,17 @@ impl<'a> FlatVector<'a> {
115144
}
116145
}
117146

118-
/// Copy data to the vector.
119-
pub fn copy<T: Copy>(&mut self, data: &[T]) {
147+
/// Copy typed data to the vector.
148+
///
149+
/// # Safety
150+
/// The caller must ensure `T` matches the DuckDB vector's physical storage
151+
/// and no other references to the same storage exist during the copy.
152+
///
153+
/// # Panics
154+
/// Panics if `data.len()` exceeds the vector capacity.
155+
pub unsafe fn copy<T: Copy>(&mut self, data: &[T]) {
120156
assert!(data.len() <= self.capacity());
121-
self.as_mut_slice::<T>()[0..data.len()].copy_from_slice(data);
157+
(unsafe { self.as_mut_slice::<T>() })[0..data.len()].copy_from_slice(data);
122158
}
123159
}
124160

@@ -239,20 +275,25 @@ impl<'a> ListVector<'a> {
239275
}
240276

241277
/// Set primitive data to the child node.
242-
pub fn set_child<T: Copy>(&self, data: &[T]) {
243-
self.child(data.len()).copy(data);
278+
///
279+
/// # Safety
280+
/// The caller must ensure `T` matches the child vector's physical storage
281+
/// and no other references to the same storage exist during the copy.
282+
pub unsafe fn set_child<T: Copy>(&self, data: &[T]) {
283+
unsafe { self.child(data.len()).copy(data) };
244284
self.set_len(data.len());
245285
}
246286

247287
/// Set offset and length to the entry.
248288
pub fn set_entry(&mut self, idx: usize, offset: usize, length: usize) {
249-
self.entries.as_mut_slice::<duckdb_list_entry>()[idx].offset = offset as u64;
250-
self.entries.as_mut_slice::<duckdb_list_entry>()[idx].length = length as u64;
289+
let entries = unsafe { self.entries.as_mut_slice::<duckdb_list_entry>() };
290+
entries[idx].offset = offset as u64;
291+
entries[idx].length = length as u64;
251292
}
252293

253294
/// Get offset and length for the entry at index.
254295
pub fn get_entry(&self, idx: usize) -> (usize, usize) {
255-
let entry = self.entries.as_slice::<duckdb_list_entry>()[idx];
296+
let entry = (unsafe { self.entries.as_slice::<duckdb_list_entry>() })[idx];
256297
(entry.offset as usize, entry.length as usize)
257298
}
258299

@@ -320,8 +361,12 @@ impl<'a> ArrayVector<'a> {
320361
}
321362

322363
/// Set primitive data to the child node.
323-
pub fn set_child<T: Copy>(&self, data: &[T]) {
324-
self.child(data.len()).copy(data);
364+
///
365+
/// # Safety
366+
/// The caller must ensure `T` matches the child vector's physical storage
367+
/// and no other references to the same storage exist during the copy.
368+
pub unsafe fn set_child<T: Copy>(&self, data: &[T]) {
369+
unsafe { self.child(data.len()).copy(data) };
325370
}
326371

327372
/// Set row as null

crates/duckdb/src/vscalar/mod.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,8 @@ mod test {
237237
input: &mut DataChunkHandle,
238238
_: &mut dyn WritableVector,
239239
) -> Result<(), Box<dyn std::error::Error>> {
240-
let mut msg = input.flat_vector(0).as_slice_with_len::<duckdb_string_t>(input.len())[0];
240+
let vector = input.flat_vector(0);
241+
let mut msg = unsafe { vector.as_slice_with_len::<duckdb_string_t>(input.len()) }[0];
241242
let string = DuckString::new(&mut msg).as_str();
242243
Err(format!("Error: {string}").into())
243244
}
@@ -276,7 +277,7 @@ mod test {
276277
output: &mut dyn WritableVector,
277278
) -> Result<(), Box<dyn std::error::Error>> {
278279
let values = input.flat_vector(0);
279-
let values = values.as_slice_with_len::<duckdb_string_t>(input.len());
280+
let values = unsafe { values.as_slice_with_len::<duckdb_string_t>(input.len()) };
280281
let strings = values
281282
.iter()
282283
.map(|ptr| DuckString::new(&mut { *ptr }).as_str().to_string())
@@ -311,11 +312,11 @@ mod test {
311312
let output = output.flat_vector();
312313
let counts = input.flat_vector(1);
313314
let values = input.flat_vector(0);
314-
let values = values.as_slice_with_len::<duckdb_string_t>(input.len());
315+
let values = unsafe { values.as_slice_with_len::<duckdb_string_t>(input.len()) };
315316
let strings = values
316317
.iter()
317318
.map(|ptr| DuckString::new(&mut { *ptr }).as_str().to_string());
318-
let counts = counts.as_slice_with_len::<i32>(input.len());
319+
let counts = unsafe { counts.as_slice_with_len::<i32>(input.len()) };
319320
for (count, value) in counts.iter().zip(strings).take(input.len()) {
320321
output.insert(0, value.repeat((*count) as usize).as_str());
321322
}
@@ -426,7 +427,7 @@ mod test {
426427
) -> Result<(), Box<dyn std::error::Error>> {
427428
let len = input.len();
428429
let mut output_vec = output.flat_vector();
429-
let data = output_vec.as_mut_slice::<i64>();
430+
let data = unsafe { output_vec.as_mut_slice::<i64>() };
430431

431432
for item in data.iter_mut().take(len) {
432433
*item = NON_VOLATILE_COUNTER.fetch_add(1, Ordering::SeqCst) as i64;
@@ -454,7 +455,7 @@ mod test {
454455
) -> Result<(), Box<dyn std::error::Error>> {
455456
let len = input.len();
456457
let mut output_vec = output.flat_vector();
457-
let data = output_vec.as_mut_slice::<i64>();
458+
let data = unsafe { output_vec.as_mut_slice::<i64>() };
458459

459460
for item in data.iter_mut().take(len) {
460461
*item = VOLATILE_COUNTER.fetch_add(1, Ordering::SeqCst) as i64;

0 commit comments

Comments
 (0)