Skip to content

Commit e14125a

Browse files
committed
feat: teach ALPRDArray how to append_to_builder
The PrimitiveBuilder API is a bit awkward for this. I needed two key things: 1. I want to treat a trailing slice of the builder's values as a different type. In particular, ALPRD decodes u32s (respectively u64s) and then converts them to f32s (resp. f64s). 2. I want to separately update the nulls and the values. In particular, ALPRD stores the nulls on the left parts whereas the right parts hold the values. All of this is copacetic *when done on the UninitRange*, but I do it on the whole PrimitiveBuilder. I kinda think PrimitiveBuilder should be able to split itself into the initialized and uninitialized parts. In my opinion, it seems reasonable to transmute the builder of uninitialized parts since no one else is looking at that data anyway. Signed-off-by: Daniel King <dan@spiraldb.com>
1 parent 68ca4fa commit e14125a

File tree

5 files changed

+214
-31
lines changed

5 files changed

+214
-31
lines changed

encodings/alp/src/alp_rd/array.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ use vortex_array::ProstMetadata;
1919
use vortex_array::SerializeMetadata;
2020
use vortex_array::arrays::PrimitiveArray;
2121
use vortex_array::buffer::BufferHandle;
22+
use vortex_array::builders::ArrayBuilder;
23+
use vortex_array::builders::PrimitiveBuilder;
2224
use vortex_array::dtype::DType;
2325
use vortex_array::dtype::Nullability;
2426
use vortex_array::dtype::PType;
@@ -36,13 +38,16 @@ use vortex_array::vtable::ValidityChild;
3638
use vortex_array::vtable::ValidityVTableFromChild;
3739
use vortex_array::vtable::VisitorVTable;
3840
use vortex_buffer::Buffer;
41+
use vortex_error::VortexExpect as _;
3942
use vortex_error::VortexResult;
4043
use vortex_error::vortex_bail;
4144
use vortex_error::vortex_ensure;
4245
use vortex_error::vortex_err;
4346
use vortex_mask::Mask;
4447
use vortex_session::VortexSession;
4548

49+
use crate::ALPRDFloat;
50+
use crate::alp_rd::alp_rd_decode_inplace;
4651
use crate::alp_rd::kernel::PARENT_KERNELS;
4752
use crate::alp_rd::rules::RULES;
4853
use crate::alp_rd_decode;
@@ -222,6 +227,18 @@ impl VTable for ALPRDVTable {
222227
Ok(())
223228
}
224229

230+
fn append_to_builder(
231+
array: &Self::Array,
232+
builder: &mut dyn ArrayBuilder,
233+
ctx: &mut ExecutionCtx,
234+
) -> VortexResult<()> {
235+
if array.is_f32() {
236+
append_to_builder_typed::<f32>(array, builder, ctx)
237+
} else {
238+
append_to_builder_typed::<f64>(array, builder, ctx)
239+
}
240+
}
241+
225242
fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
226243
let left_parts = array.left_parts().clone().execute::<PrimitiveArray>(ctx)?;
227244
let right_parts = array.right_parts().clone().execute::<PrimitiveArray>(ctx)?;
@@ -282,6 +299,58 @@ impl VTable for ALPRDVTable {
282299
}
283300
}
284301

302+
fn append_to_builder_typed<T: ALPRDFloat>(
303+
array: &ALPRDArray,
304+
builder: &mut dyn ArrayBuilder,
305+
ctx: &mut ExecutionCtx,
306+
) -> VortexResult<()> {
307+
let left_parts = array.left_parts().clone().execute::<PrimitiveArray>(ctx)?;
308+
let validity = array
309+
.left_parts()
310+
.validity()?
311+
.to_array(array.len())
312+
.execute::<Mask>(ctx)?;
313+
314+
let builder: &mut PrimitiveBuilder<T> = builder
315+
.as_any_mut()
316+
.downcast_mut()
317+
.vortex_expect("ALPRD array must canonicalize into a primitive array");
318+
319+
let cursor = builder.len();
320+
let n = array.len();
321+
builder.reserve_exact(n);
322+
323+
builder.as_values_and_nulls(|non_nullable_builder, nulls| -> VortexResult<()> {
324+
// SAFETY: T and T::UINT have the same size/alignment (f32↔u32, f64↔u64).
325+
unsafe {
326+
non_nullable_builder.as_transmuted::<T::UINT, VortexResult<()>>(|uint_builder| {
327+
// BitPacked can downcast uint_builder to PrimitiveBuilder<T::UINT>.
328+
array.right_parts().append_to_builder(uint_builder, ctx)?;
329+
330+
// Decode in-place: combine left+right bits.
331+
let right_parts_slice = &mut uint_builder.values_mut()[cursor..cursor + n];
332+
alp_rd_decode_inplace::<T>(
333+
left_parts.into_buffer::<u16>(),
334+
array.left_parts_dictionary(),
335+
array.right_bit_width,
336+
right_parts_slice,
337+
array.left_parts_patches(),
338+
ctx,
339+
)?;
340+
341+
Ok(())
342+
})?;
343+
}
344+
345+
// Set the correct validity from left_parts.
346+
nulls.append_validity_mask(validity);
347+
348+
Ok(())
349+
})?;
350+
351+
Ok(())
352+
}
353+
285354
#[derive(Clone, Debug)]
286355
pub struct ALPRDArray {
287356
dtype: DType,

encodings/alp/src/alp_rd/mod.rs

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -294,13 +294,46 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
294294
left_parts: Buffer<u16>,
295295
left_parts_dict: &[u16],
296296
right_bit_width: u8,
297-
right_parts: BufferMut<T::UINT>,
297+
mut right_parts: BufferMut<T::UINT>,
298298
left_parts_patches: Option<&Patches>,
299299
ctx: &mut ExecutionCtx,
300300
) -> VortexResult<Buffer<T>> {
301-
if left_parts.len() != right_parts.len() {
302-
vortex_panic!("alp_rd_decode: left_parts.len != right_parts.len");
303-
}
301+
alp_rd_decode_inplace::<T>(
302+
left_parts,
303+
left_parts_dict,
304+
right_bit_width,
305+
right_parts.as_mut_slice(),
306+
left_parts_patches,
307+
ctx,
308+
)?;
309+
310+
// SAFETY: alp_rd_decode_inplace combined left+right bits in-place.
311+
// The buffer now contains valid T bit patterns stored as T::UINT.
312+
Ok(unsafe { right_parts.transmute::<T>() }.freeze())
313+
}
314+
315+
/// Decode ALP-RD encoded values in-place into a mutable slice of right parts.
316+
///
317+
/// After this function returns, `right_parts` contains valid `T` bit patterns
318+
/// (stored as `T::UINT`). Each element is the combination of the dictionary-decoded
319+
/// left part shifted left by `right_bit_width` bits, OR'd with the original right part.
320+
///
321+
/// # Panics
322+
///
323+
/// Panics if `left_parts.len() != right_parts.len()`.
324+
pub fn alp_rd_decode_inplace<T: ALPRDFloat>(
325+
left_parts: Buffer<u16>,
326+
left_parts_dict: &[u16],
327+
right_bit_width: u8,
328+
right_parts: &mut [T::UINT],
329+
left_parts_patches: Option<&Patches>,
330+
ctx: &mut ExecutionCtx,
331+
) -> VortexResult<()> {
332+
assert_eq!(
333+
left_parts.len(),
334+
right_parts.len(),
335+
"alp_rd_decode_inplace: left_parts.len != right_parts.len"
336+
);
304337

305338
// Decode the left-parts dictionary
306339
let mut values = BufferMut::<u16>::from_iter(
@@ -316,13 +349,13 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
316349
alp_rd_apply_patches(&mut values, &indices, &patch_values, patches.offset());
317350
}
318351

319-
// Shift the left-parts and add in the right-parts.
320-
Ok(alp_rd_decode_core(
321-
left_parts_dict,
322-
right_bit_width,
323-
right_parts,
324-
values,
325-
))
352+
// Combine left + right in-place: *right = (left << rbw) | *right
353+
for (right, left) in right_parts.iter_mut().zip(values.iter()) {
354+
let left_uint = <T as ALPRDFloat>::from_u16(*left);
355+
*right = (left_uint << (right_bit_width as usize)) | *right;
356+
}
357+
358+
Ok(())
326359
}
327360

328361
/// Apply patches to the decoded left-parts values.
@@ -343,25 +376,6 @@ fn alp_rd_apply_patches(
343376
})
344377
}
345378

346-
/// Core decode logic shared between `alp_rd_decode` and `execute_alp_rd_decode`.
347-
fn alp_rd_decode_core<T: ALPRDFloat>(
348-
_left_parts_dict: &[u16],
349-
right_bit_width: u8,
350-
right_parts: BufferMut<T::UINT>,
351-
values: BufferMut<u16>,
352-
) -> Buffer<T> {
353-
// Shift the left-parts and add in the right-parts.
354-
let mut index = 0;
355-
right_parts
356-
.map_each_in_place(|right| {
357-
let left = values[index];
358-
index += 1;
359-
let left = <T as ALPRDFloat>::from_u16(left);
360-
T::from_bits((left << (right_bit_width as usize)) | right)
361-
})
362-
.freeze()
363-
}
364-
365379
/// Find the best "cut point" for a set of floating point values such that we can
366380
/// cast them all to the relevant value instead.
367381
fn find_best_dictionary<T: ALPRDFloat>(samples: &[T]) -> ALPRDDictionary {

vortex-array/src/builders/lazy_null_builder.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ impl LazyBitBufferBuilder {
3131
}
3232
}
3333

34+
pub fn capacity(&self) -> usize {
35+
self.capacity
36+
}
37+
3438
/// Appends `n` non-null values to the builder.
3539
#[inline]
3640
pub fn append_n_non_nulls(&mut self, n: usize) {
@@ -105,6 +109,11 @@ impl LazyBitBufferBuilder {
105109
self.inner.as_ref().map(|i| i.len()).unwrap_or(self.len)
106110
}
107111

112+
/// Returns true if this builder contains no bits.
113+
pub fn is_empty(&self) -> bool {
114+
self.len() == 0
115+
}
116+
108117
fn finish(&mut self) -> Option<BitBuffer> {
109118
self.len = 0;
110119
self.inner.take().map(|b| b.freeze())

vortex-array/src/builders/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::match_each_native_ptype;
4444
use crate::scalar::Scalar;
4545

4646
mod lazy_null_builder;
47-
pub(crate) use lazy_null_builder::LazyBitBufferBuilder;
47+
pub use lazy_null_builder::LazyBitBufferBuilder;
4848

4949
mod bool;
5050
mod decimal;

vortex-array/src/builders/primitive.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,97 @@ impl<T: NativePType> PrimitiveBuilder<T> {
118118
self.values.extend(iter);
119119
self.nulls.append_validity_mask(mask);
120120
}
121+
122+
/// Returns a mutable slice of the initialized values in this builder.
123+
pub fn values_mut(&mut self) -> &mut [T] {
124+
self.values.as_mut_slice()
125+
}
126+
127+
/// Temporarily detaches the nulls buffer, making this builder non-nullable
128+
/// for the duration of the closure.
129+
///
130+
/// The closure receives:
131+
/// - `&mut self` with dtype changed to non-nullable and nulls swapped out
132+
/// - The detached [`LazyBitBufferBuilder`] for managing validity separately
133+
///
134+
/// After the closure returns, the original nullability and nulls are restored.
135+
pub fn as_values_and_nulls<R>(
136+
&mut self,
137+
f: impl FnOnce(&mut Self, &mut LazyBitBufferBuilder) -> R,
138+
) -> R {
139+
let original_dtype = self.dtype.clone();
140+
141+
// Make the builder non-nullable.
142+
self.dtype = DType::Primitive(T::PTYPE, Nullability::NonNullable);
143+
144+
// Swap nulls out.
145+
let mut nulls = LazyBitBufferBuilder::new(0);
146+
std::mem::swap(&mut self.nulls, &mut nulls);
147+
148+
let result = f(self, &mut nulls);
149+
150+
// Restore dtype and nulls.
151+
self.dtype = original_dtype;
152+
std::mem::swap(&mut self.nulls, &mut nulls);
153+
154+
result
155+
}
156+
157+
/// Temporarily transmutes the values buffer from `T` to `U` and runs a
158+
/// closure with a `PrimitiveBuilder<U>`.
159+
///
160+
/// The temporary builder preserves this builder's nullability, and the nulls
161+
/// buffer is moved into it for the duration of the closure.
162+
///
163+
/// After the closure returns, values are transmuted back to `T` and the
164+
/// nulls buffer is restored.
165+
///
166+
/// # Safety
167+
///
168+
/// - `T` and `U` must have the same size and alignment.
169+
/// - After this call, any newly appended values contain `U` bit patterns.
170+
/// The caller must ensure these are valid `T` bit patterns before reading
171+
/// them as `T`.
172+
pub unsafe fn as_transmuted<U: NativePType, R>(
173+
&mut self,
174+
f: impl FnOnce(&mut PrimitiveBuilder<U>) -> R,
175+
) -> R {
176+
assert_eq!(
177+
size_of::<T>(),
178+
size_of::<U>(),
179+
"T and U must have the same size"
180+
);
181+
assert_eq!(
182+
align_of::<T>(),
183+
align_of::<U>(),
184+
"T and U must have the same alignment"
185+
);
186+
187+
let values = std::mem::take(&mut self.values);
188+
// SAFETY: We have verified that T and U have the same size and alignment.
189+
let retyped_values: BufferMut<U> = unsafe { values.transmute() };
190+
191+
// Swap nulls into the temp builder so it inherits our validity state.
192+
let mut nulls = LazyBitBufferBuilder::new(0);
193+
std::mem::swap(&mut self.nulls, &mut nulls);
194+
195+
let mut temp_builder = PrimitiveBuilder {
196+
dtype: DType::Primitive(U::PTYPE, self.dtype.nullability()),
197+
values: retyped_values,
198+
nulls,
199+
};
200+
201+
let result = f(&mut temp_builder);
202+
203+
// Restore values (transmute back to T).
204+
// SAFETY: Same size/alignment guarantee as above.
205+
self.values = unsafe { std::mem::take(&mut temp_builder.values).transmute() };
206+
207+
// Restore nulls.
208+
std::mem::swap(&mut self.nulls, &mut temp_builder.nulls);
209+
210+
result
211+
}
121212
}
122213

123214
impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {

0 commit comments

Comments
 (0)