Skip to content

Commit b12f6b2

Browse files
committed
FSST kernel validity for byte_length; comparison OnPair kernel
Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 9444d20 commit b12f6b2

5 files changed

Lines changed: 195 additions & 2 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_array::IntoArray;
5+
use vortex_array::ValidityVTable;
6+
use vortex_array::arrays::ConstantArray;
7+
use vortex_array::builtins::ArrayBuiltins;
8+
use vortex_array::dtype::DType;
9+
use vortex_array::dtype::PType;
10+
use vortex_array::scalar::Scalar;
11+
use vortex_array::scalar_fn::fns::byte_length::ByteLengthKernel;
12+
use vortex_array::validity::Validity;
13+
14+
use crate::OnPair;
15+
use crate::OnPairArraySlotsExt;
16+
17+
impl ByteLengthKernel for OnPair {
18+
fn byte_length(
19+
array: vortex_array::ArrayView<'_, Self>,
20+
_ctx: &mut vortex_array::ExecutionCtx,
21+
) -> vortex_error::VortexResult<Option<vortex_array::ArrayRef>> {
22+
let nullable = array.dtype().nullability();
23+
let dtype = DType::Primitive(PType::U64, nullable);
24+
// Uncompressed lengths are non-nullable and may be less than u64 each
25+
let lengths = array.uncompressed_lengths().cast(dtype.clone())?;
26+
Ok(Some(match OnPair::validity(array)? {
27+
Validity::NonNullable | Validity::AllValid => lengths,
28+
Validity::Array(v) => lengths.mask(v)?,
29+
Validity::AllInvalid => {
30+
ConstantArray::new(Scalar::null(dtype), lengths.len()).into_array()
31+
}
32+
}))
33+
}
34+
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_array::ArrayRef;
5+
use vortex_array::ArrayView;
6+
use vortex_array::ExecutionCtx;
7+
use vortex_array::IntoArray;
8+
use vortex_array::arrays::BoolArray;
9+
use vortex_array::arrays::ConstantArray;
10+
use vortex_array::builtins::ArrayBuiltins;
11+
use vortex_array::dtype::DType;
12+
use vortex_array::scalar::Scalar;
13+
use vortex_array::scalar_fn::fns::binary::CompareKernel;
14+
use vortex_array::scalar_fn::fns::operators::CompareOperator;
15+
use vortex_buffer::BitBuffer;
16+
use vortex_error::VortexResult;
17+
18+
use crate::OnPair;
19+
use crate::OnPairArraySlotsExt;
20+
21+
impl CompareKernel for OnPair {
22+
fn compare(
23+
lhs: ArrayView<'_, Self>,
24+
rhs: &ArrayRef,
25+
operator: CompareOperator,
26+
ctx: &mut ExecutionCtx,
27+
) -> VortexResult<Option<ArrayRef>> {
28+
let Some(constant) = rhs.as_constant() else {
29+
return Ok(None);
30+
};
31+
let is_empty = match constant.dtype() {
32+
DType::Utf8(_) => constant.as_utf8().is_empty(),
33+
DType::Binary(_) => constant.as_binary().is_empty(),
34+
_ => return Ok(None),
35+
};
36+
if is_empty != Some(true) {
37+
return Ok(None);
38+
}
39+
40+
let lengths = lhs.uncompressed_lengths();
41+
let buffer = match operator {
42+
// every value is greater than an empty string
43+
CompareOperator::Gte => BitBuffer::new_set(lhs.len()),
44+
// no value is less than an empty string
45+
CompareOperator::Lt => BitBuffer::new_unset(lhs.len()),
46+
_ => lengths
47+
.binary(
48+
ConstantArray::new(Scalar::zero_value(lengths.dtype()), lengths.len())
49+
.into_array(),
50+
operator.into(),
51+
)?
52+
.execute(ctx)?,
53+
};
54+
Ok(Some(
55+
BoolArray::new(
56+
buffer,
57+
lhs.validity()?
58+
.union_nullability(constant.dtype().nullability()),
59+
)
60+
.into_array(),
61+
))
62+
}
63+
}
64+
65+
#[cfg(test)]
66+
mod tests {
67+
use std::sync::LazyLock;
68+
69+
use rstest::rstest;
70+
use vortex_array::IntoArray;
71+
use vortex_array::VortexSessionExecute;
72+
use vortex_array::arrays::BoolArray;
73+
use vortex_array::arrays::ConstantArray;
74+
use vortex_array::arrays::VarBinArray;
75+
use vortex_array::assert_arrays_eq;
76+
use vortex_array::builtins::ArrayBuiltins;
77+
use vortex_array::dtype::DType;
78+
use vortex_array::dtype::Nullability;
79+
use vortex_array::scalar::Scalar;
80+
use vortex_array::scalar_fn::fns::operators::Operator;
81+
use vortex_array::session::ArraySession;
82+
use vortex_error::VortexResult;
83+
use vortex_session::VortexSession;
84+
85+
use crate::compress::DEFAULT_DICT12_CONFIG;
86+
use crate::compress::onpair_compress;
87+
88+
static SESSION: LazyLock<VortexSession> =
89+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
90+
91+
#[cfg_attr(miri, ignore)]
92+
#[rstest]
93+
#[case(Operator::Eq, [true, false, true, false])]
94+
#[case(Operator::NotEq, [false, true, false, true])]
95+
#[case(Operator::Gt, [false, true, false, true])]
96+
#[case(Operator::Gte, [true, true, true, true])]
97+
#[case(Operator::Lt, [false, false, false, false])]
98+
#[case(Operator::Lte, [true, false, true, false])]
99+
fn compare_empty_string(#[case] op: Operator, #[case] expected: [bool; 4]) -> VortexResult<()> {
100+
let input = VarBinArray::from_iter(
101+
[Some(""), Some("a"), Some(""), Some("bbb")],
102+
DType::Utf8(Nullability::NonNullable),
103+
);
104+
let arr = onpair_compress(&input, input.len(), input.dtype(), DEFAULT_DICT12_CONFIG)?
105+
.into_array();
106+
107+
let mut ctx = SESSION.create_execution_ctx();
108+
let result = arr
109+
.binary(ConstantArray::new("", input.len()).into_array(), op)?
110+
.execute::<BoolArray>(&mut ctx)?;
111+
assert_arrays_eq!(&result, &BoolArray::from_iter(expected));
112+
Ok(())
113+
}
114+
115+
#[cfg_attr(miri, ignore)]
116+
#[test]
117+
fn compare_empty_string_nullable() -> VortexResult<()> {
118+
let input = VarBinArray::from_iter(
119+
[Some(""), None, Some("x")],
120+
DType::Utf8(Nullability::Nullable),
121+
);
122+
let arr = onpair_compress(&input, input.len(), input.dtype(), DEFAULT_DICT12_CONFIG)?
123+
.into_array();
124+
let mut ctx = SESSION.create_execution_ctx();
125+
126+
let eq_empty = arr
127+
.clone()
128+
.binary(ConstantArray::new("", arr.len()).into_array(), Operator::Eq)?
129+
.execute::<BoolArray>(&mut ctx)?;
130+
assert_arrays_eq!(
131+
&eq_empty,
132+
&BoolArray::from_iter([Some(true), None, Some(false)])
133+
);
134+
135+
let null_rhs =
136+
ConstantArray::new(Scalar::null(DType::Utf8(Nullability::Nullable)), arr.len());
137+
let eq_null = arr
138+
.binary(null_rhs.into_array(), Operator::Eq)?
139+
.execute::<BoolArray>(&mut ctx)?;
140+
assert_arrays_eq!(&eq_null, &BoolArray::from_iter([None::<bool>, None, None]));
141+
Ok(())
142+
}
143+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
mod byte_length;
45
mod cast;
6+
mod compare;
57
mod filter;
68
mod slice;

encodings/experimental/onpair/src/kernel.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33

44
use vortex_array::arrays::filter::FilterExecuteAdaptor;
55
use vortex_array::kernel::ParentKernelSet;
6+
use vortex_array::scalar_fn::fns::binary::CompareExecuteAdaptor;
7+
use vortex_array::scalar_fn::fns::byte_length::ByteLengthExecuteAdaptor;
68

79
use crate::OnPair;
810

911
// TODO: implement ListExecute & TakeExecute for OnPair
10-
pub(super) const PARENT_KERNELS: ParentKernelSet<OnPair> =
11-
ParentKernelSet::new(&[ParentKernelSet::lift(&FilterExecuteAdaptor(OnPair))]);
12+
pub(super) const PARENT_KERNELS: ParentKernelSet<OnPair> = ParentKernelSet::new(&[
13+
ParentKernelSet::lift(&FilterExecuteAdaptor(OnPair)),
14+
ParentKernelSet::lift(&CompareExecuteAdaptor(OnPair)),
15+
ParentKernelSet::lift(&ByteLengthExecuteAdaptor(OnPair)),
16+
]);

vortex-array/src/scalar_fn/fns/byte_length.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use crate::arrays::varbinview::VarBinViewArrayExt;
2424
use crate::dtype::DType;
2525
use crate::dtype::Nullability;
2626
use crate::dtype::PType;
27+
use crate::expr::Expression;
2728
use crate::kernel::ExecuteParentKernel;
2829
use crate::scalar::Scalar;
2930
use crate::scalar_fn::Arity;
@@ -122,6 +123,14 @@ impl ScalarFnVTable for ByteLength {
122123
}
123124
}
124125

126+
fn validity(
127+
&self,
128+
_: &Self::Options,
129+
expression: &Expression,
130+
) -> VortexResult<Option<Expression>> {
131+
Ok(Some(expression.child(0).validity()?))
132+
}
133+
125134
fn is_null_sensitive(&self, _options: &Self::Options) -> bool {
126135
false
127136
}

0 commit comments

Comments
 (0)