Skip to content

Commit b555fd5

Browse files
joseph-isaacsclaude
authored andcommitted
perf: remove unneeded scalar_at and valid_at and use arrays instead
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent d650cab commit b555fd5

5 files changed

Lines changed: 72 additions & 40 deletions

File tree

vortex-array/src/arrays/listview/rebuild.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,14 @@ impl ListViewArray {
210210
let mut new_sizes = BufferMut::<S>::with_capacity(len);
211211
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
212212

213-
let validity = self.validity()?;
213+
// Resolve validity to a mask once instead of probing it per row: `execute_is_valid`
214+
// executes a scalar on every call for array-backed validity, which is O(len) work repeated
215+
// `len` times.
216+
let validity = self.validity()?.execute_mask(len, ctx)?;
217+
214218
let mut n_elements = NewOffset::zero();
215219
for index in 0..len {
216-
if !validity.execute_is_valid(index, ctx)? {
220+
if !validity.value(index) {
217221
new_offsets.push(n_elements);
218222
new_sizes.push(S::zero());
219223
continue;
@@ -292,10 +296,12 @@ impl ListViewArray {
292296
let mut new_elements_builder =
293297
builder_with_capacity(element_dtype.as_ref(), self.elements().len());
294298

295-
let validity = self.validity()?;
299+
// Resolve validity to a mask once instead of probing it per row (see `rebuild_with_take`).
300+
let validity = self.validity()?.execute_mask(len, ctx)?;
301+
296302
let mut n_elements = NewOffset::zero();
297303
for index in 0..len {
298-
if !validity.execute_is_valid(index, ctx)? {
304+
if !validity.value(index) {
299305
// For NULL lists, place them after the previous item's data to maintain the
300306
// no-overlap invariant for zero-copy to `ListArray` arrays.
301307
new_offsets.push(n_elements);

vortex-array/src/arrays/varbin/compute/filter.rs

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use crate::arrays::varbin::builder::VarBinBuilder;
2424
use crate::dtype::DType;
2525
use crate::dtype::IntegerPType;
2626
use crate::match_each_integer_ptype;
27-
use crate::validity::Validity;
2827

2928
impl FilterKernel for VarBin {
3029
fn filter(
@@ -170,9 +169,11 @@ fn filter_select_var_bin_by_index(
170169
offsets.as_slice::<O>(),
171170
values.bytes().as_slice(),
172171
mask_indices,
173-
values.validity()?,
172+
values
173+
.varbin_validity()
174+
.execute_mask(values.as_ref().len(), ctx)
175+
.vortex_expect("Failed to compute validity mask"),
174176
selection_count,
175-
ctx,
176177
)
177178
})
178179
}
@@ -182,25 +183,37 @@ fn filter_select_var_bin_by_index_primitive_offset<O: IntegerPType>(
182183
offsets: &[O],
183184
data: &[u8],
184185
mask_indices: &[usize],
185-
// TODO(ngates): pass LogicalValidity instead
186-
validity: Validity,
186+
mask: Mask,
187187
selection_count: usize,
188-
ctx: &mut ExecutionCtx,
189188
) -> VortexResult<VarBinArray> {
189+
let value_at = |idx: usize| -> VortexResult<&[u8]> {
190+
let start = offsets[idx]
191+
.to_usize()
192+
.ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?;
193+
let end = offsets[idx + 1].to_usize().ok_or_else(|| {
194+
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
195+
})?;
196+
Ok(&data[start..end])
197+
};
198+
190199
let mut builder = VarBinBuilder::<O>::with_capacity(selection_count);
191-
for idx in mask_indices.iter().copied() {
192-
if validity.execute_is_valid(idx, ctx)? {
193-
let (start, end) = (
194-
offsets[idx].to_usize().ok_or_else(|| {
195-
vortex_err!("Failed to convert offset to usize: {}", offsets[idx])
196-
})?,
197-
offsets[idx + 1].to_usize().ok_or_else(|| {
198-
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
199-
})?,
200-
);
201-
builder.append_value(&data[start..end])
202-
} else {
203-
builder.append_null()
200+
match mask.bit_buffer() {
201+
AllOr::All => {
202+
for idx in mask_indices.iter().copied() {
203+
builder.append_value(value_at(idx)?)
204+
}
205+
}
206+
AllOr::None => {
207+
builder.append_n_nulls(selection_count);
208+
}
209+
AllOr::Some(validity) => {
210+
for idx in mask_indices.iter().copied() {
211+
if validity.value(idx) {
212+
builder.append_value(value_at(idx)?)
213+
} else {
214+
builder.append_null()
215+
}
216+
}
204217
}
205218
}
206219
Ok(builder.finish(dtype))

vortex-array/src/display/mod.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -579,19 +579,22 @@ impl ArrayRef {
579579
}
580580
#[cfg(feature = "table-display")]
581581
DisplayOptions::TableDisplay => {
582+
use vortex_mask::Mask;
583+
584+
use crate::arrays::StructArray;
582585
use crate::arrays::struct_::StructArrayExt;
583-
#[expect(deprecated)]
584-
use crate::canonical::ToCanonical as _;
585586
use crate::dtype::DType;
586587

587588
let mut builder = tabled::builder::Builder::default();
589+
// Reuse a single execution context across all per-row accesses below.
590+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
588591

589592
// Special logic for struct arrays.
590593
let DType::Struct(sf, _) = self.dtype() else {
591594
// For non-struct arrays, simply display a single column table without header.
592595
for row_idx in 0..self.len() {
593596
let value = self
594-
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
597+
.execute_scalar(row_idx, &mut ctx)
595598
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
596599
builder.push_record([value]);
597600
}
@@ -602,22 +605,27 @@ impl ArrayRef {
602605
return write!(f, "{table}");
603606
};
604607

605-
#[expect(deprecated)]
606-
let struct_ = self.to_struct();
608+
let struct_ = match self.clone().execute::<StructArray>(&mut ctx) {
609+
Ok(struct_) => struct_,
610+
Err(e) => return write!(f, "<error: {e}>"),
611+
};
607612
builder.push_record(sf.names().iter().map(|name| name.to_string()));
608613

614+
// Resolve validity to a mask once instead of probing it per row.
615+
let validity = self
616+
.validity()
617+
.and_then(|v| v.execute_mask(self.len(), &mut ctx))
618+
.unwrap_or_else(|_| Mask::new_false(self.len()));
619+
609620
for row_idx in 0..self.len() {
610-
if !self
611-
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
612-
.unwrap_or(false)
613-
{
621+
if !validity.value(row_idx) {
614622
let null_row = vec!["null".to_string(); sf.names().len()];
615623
builder.push_record(null_row);
616624
} else {
617-
let mut row = Vec::new();
625+
let mut row = Vec::with_capacity(struct_.struct_fields().nfields());
618626
for field_array in StructArrayExt::iter_unmasked_fields(&struct_) {
619627
let value = field_array
620-
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
628+
.execute_scalar(row_idx, &mut ctx)
621629
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
622630
row.push(value);
623631
}
@@ -634,10 +642,7 @@ impl ArrayRef {
634642
}
635643

636644
for row_idx in 0..self.len() {
637-
if !self
638-
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
639-
.unwrap_or(false)
640-
{
645+
if !validity.value(row_idx) {
641646
table.modify(
642647
(1 + row_idx, 0),
643648
tabled::settings::Span::column(sf.names().len() as isize),

vortex-tensor/src/scalar_fns/l2_denorm.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,14 +441,18 @@ pub fn normalize_as_l2_denorm(
441441
let normalized_dtype = input.dtype().as_nonnullable();
442442
let flat = extract_flat_elements(input.storage_array(), tensor_flat_size, ctx)?;
443443

444+
// Resolve validity to a mask once rather than probing it per row (each `Validity::is_valid`
445+
// executes a scalar for array-backed validity).
446+
let norms_valid = norms_validity.execute_mask(row_count, ctx)?;
447+
444448
// Normalize all of the vectors.
445449
let normalized = match_each_float_ptype!(flat.ptype(), |T| {
446450
let norm_values = primitive_norms.as_slice::<T>();
447451

448452
let total_elements = row_count * tensor_flat_size;
449453
let mut elements = BufferMut::<T>::with_capacity(total_elements);
450454
for i in 0..row_count {
451-
let is_valid = norms_validity.execute_is_valid(i, ctx)?;
455+
let is_valid = norms_valid.value(i);
452456
let norm = norm_values[i];
453457

454458
// SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly
@@ -637,12 +641,14 @@ pub fn validate_l2_normalized_rows_against_norms(
637641
Some(norms) => normalized_validity.and(norms.validity()?)?,
638642
None => normalized_validity,
639643
};
644+
// Resolve validity to a mask once rather than probing it per row.
645+
let combined_valid = combined_validity.execute_mask(row_count, ctx)?;
640646

641647
match_each_float_ptype!(element_ptype, |T| {
642648
let stored_norms = norms.as_ref().map(|norms| norms.as_slice::<T>());
643649

644650
for i in 0..row_count {
645-
if !combined_validity.execute_is_valid(i, ctx)? {
651+
if !combined_valid.value(i) {
646652
continue;
647653
}
648654

vortex-tui/src/browse/ui/layouts.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ use ratatui::widgets::Table;
2727
use ratatui::widgets::Widget;
2828
use ratatui::widgets::Wrap;
2929
use vortex::array::ArrayRef;
30+
use vortex::array::Canonical;
31+
use vortex::array::IntoArray;
3032
use vortex::array::VortexSessionExecute;
3133
use vortex::array::arrays::StructArray;
3234
use vortex::array::arrays::struct_::StructArrayExt;

0 commit comments

Comments
 (0)