Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 96 additions & 1 deletion vortex-cuda/benches/bitpacked_cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ use crate::common::TimedLaunchStrategy;

const N_ROWS: usize = 100_000_000;

/// Patch frequencies to benchmark (as fractions)
const PATCH_FREQUENCIES: &[(f64, &str)] =
&[(0.001, "0.1%"), (0.01, "1%"), (0.05, "5%"), (0.10, "10%")];

/// Create a bit-packed array with the given bit width
fn make_bitpacked_array<T>(bit_width: u8, len: usize) -> BitPackedArray
where
Expand All @@ -55,6 +59,46 @@ where
.vortex_expect("failed to create BitPacked array")
}

/// Create a bit-packed array with the given bit width and patch frequency.
///
/// `patch_frequency` is a fraction (0.0 to 1.0) indicating what proportion of values
/// should exceed the bit width and become patches.
///
/// This function uses bit_width=6 internally since patch values need to exceed
/// the bit width but still fit in u8 for the From<u8> trait bound.
fn make_bitpacked_array_with_patches<T>(len: usize, patch_frequency: f64) -> BitPackedArray
where
T: NativePType + Add<Output = T> + From<u8>,
{
// Use bit_width=6 so max packed value is 63, and patch values (64-255) fit in u8
let bit_width: u8 = 6;
let max_packed_val = (1u64 << bit_width) - 1; // 63

// Deterministic patch placement: place patches at regular intervals
let patch_interval = if patch_frequency > 0.0 {
(1.0 / patch_frequency) as usize
} else {
usize::MAX
};

let values: Vec<T> = (0..len)
.map(|i| {
if patch_interval > 0 && i % patch_interval == 0 {
// Patch value: 128 exceeds 6-bit max (63)
<T as From<u8>>::from(128)
} else {
// Normal value that fits within 6 bits (0-63)
let val = (i as u64 & max_packed_val) as u8;
<T as From<u8>>::from(val)
}
})
.collect();

let primitive_array = PrimitiveArray::new(Buffer::from(values), NonNullable);
BitPackedArray::encode(primitive_array.as_ref(), bit_width)
.vortex_expect("failed to create BitPacked array with patches")
}

/// Generic benchmark function for a specific type and bit width
fn benchmark_bitunpack_typed<T>(c: &mut Criterion, bit_width: u8, type_name: &str)
where
Expand Down Expand Up @@ -100,7 +144,58 @@ fn benchmark_bitunpack(c: &mut Criterion) {
benchmark_bitunpack_typed::<u64>(c, 8, "u64");
}

criterion::criterion_group!(benches, benchmark_bitunpack);
/// Benchmark function for unpacking with patches at various frequencies
fn benchmark_bitunpack_with_patches_typed<T>(c: &mut Criterion, type_name: &str)
where
T: BitPacked + NativePType + DeviceRepr + Add<Output = T> + From<u8>,
T::Physical: DeviceRepr,
{
let mut group = c.benchmark_group(format!("bitunpack_cuda_patched_{}", type_name));
group.sample_size(10);

let nbytes = N_ROWS * size_of::<T>();
group.throughput(Throughput::Bytes(nbytes as u64));

for &(patch_freq, patch_label) in PATCH_FREQUENCIES {
let array = make_bitpacked_array_with_patches::<T>(N_ROWS, patch_freq);

group.bench_with_input(
BenchmarkId::new("bitunpack_patched", patch_label),
&array,
|b, array| {
b.iter_custom(|iters| {
let timed = TimedLaunchStrategy::default();
let timer = Arc::clone(&timed.total_time_ns);

let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())
.vortex_expect("failed to create execution context")
.with_launch_strategy(Arc::new(timed));

for _ in 0..iters {
block_on(array.to_array().execute_cuda(&mut cuda_ctx)).unwrap();
}

Duration::from_nanos(timer.load(Ordering::Relaxed))
});
},
);
}

group.finish();
}

fn benchmark_bitunpack_with_patches(c: &mut Criterion) {
benchmark_bitunpack_with_patches_typed::<u8>(c, "u8");
benchmark_bitunpack_with_patches_typed::<u16>(c, "u16");
benchmark_bitunpack_with_patches_typed::<u32>(c, "u32");
benchmark_bitunpack_with_patches_typed::<u64>(c, "u64");
}

criterion::criterion_group!(
benches,
benchmark_bitunpack,
benchmark_bitunpack_with_patches
);

#[cuda_available]
criterion::criterion_main!(benches);
Expand Down
Loading