Skip to content

Commit 7fa584c

Browse files
authored
feat: N-ary CASE WHEN expressions (#20)
* N-ary * fix: update threshold and value literals in n-ary CASE WHEN benchmark * refactor: streamline context creation in benchmarks and improve test readability
1 parent 359a4d4 commit 7fa584c

3 files changed

Lines changed: 974 additions & 257 deletions

File tree

vortex-array/benches/expr/case_when_bench.rs

Lines changed: 102 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use vortex_array::expr::case_when;
1616
use vortex_array::expr::get_item;
1717
use vortex_array::expr::gt;
1818
use vortex_array::expr::lit;
19-
use vortex_array::expr::nested_case_when;
2019
use vortex_array::expr::root;
2120
use vortex_array::session::ArraySession;
2221
use vortex_array::validity::Validity;
@@ -45,100 +44,167 @@ fn make_struct_array(size: usize) -> ArrayRef {
4544
}
4645

4746
/// Benchmark a simple binary CASE WHEN with varying array sizes.
48-
#[divan::bench(args = [1000, 10000, 100000])]
47+
#[divan::bench(args = [10000, 100000, 1000000])]
4948
fn case_when_simple(bencher: Bencher, size: usize) {
5049
let array = make_struct_array(size);
5150

5251
// CASE WHEN value > 500 THEN 100 ELSE 0 END
53-
let expr = case_when(
52+
let expr = case_when([
5453
gt(get_item("value", root()), lit(500i32)),
5554
lit(100i32),
5655
lit(0i32),
57-
);
56+
]);
5857

5958
bencher
60-
.with_inputs(|| (&expr, &array))
61-
.bench_refs(|(expr, array)| {
62-
let mut ctx = SESSION.create_execution_ctx();
59+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
60+
.bench_refs(|(expr, array, ctx)| {
6361
array
6462
.apply(expr)
6563
.unwrap()
66-
.execute::<Canonical>(&mut ctx)
64+
.execute::<Canonical>(ctx)
6765
.unwrap()
6866
});
6967
}
7068

71-
/// Benchmark nested CASE WHEN with multiple conditions.
69+
/// Benchmark n-ary CASE WHEN with multiple conditions.
7270
#[divan::bench(args = [1000, 10000, 100000])]
73-
fn case_when_nested_3_conditions(bencher: Bencher, size: usize) {
71+
fn case_when_nary_3_conditions(bencher: Bencher, size: usize) {
7472
let array = make_struct_array(size);
7573

7674
// CASE WHEN value > 750 THEN 3 WHEN value > 500 THEN 2 WHEN value > 250 THEN 1 ELSE 0 END
77-
let expr = nested_case_when(
78-
vec![
79-
(gt(get_item("value", root()), lit(750i32)), lit(3i32)),
80-
(gt(get_item("value", root()), lit(500i32)), lit(2i32)),
81-
(gt(get_item("value", root()), lit(250i32)), lit(1i32)),
82-
],
83-
Some(lit(0i32)),
84-
);
75+
let expr = case_when([
76+
gt(get_item("value", root()), lit(750i32)),
77+
lit(3i32),
78+
gt(get_item("value", root()), lit(500i32)),
79+
lit(2i32),
80+
gt(get_item("value", root()), lit(250i32)),
81+
lit(1i32),
82+
lit(0i32),
83+
]);
8584

8685
bencher
87-
.with_inputs(|| (&expr, &array))
88-
.bench_refs(|(expr, array)| {
89-
let mut ctx = SESSION.create_execution_ctx();
86+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
87+
.bench_refs(|(expr, array, ctx)| {
9088
array
9189
.apply(expr)
9290
.unwrap()
93-
.execute::<Canonical>(&mut ctx)
91+
.execute::<Canonical>(ctx)
9492
.unwrap()
9593
});
9694
}
9795

9896
/// Benchmark CASE WHEN where all conditions are true (short-circuit path).
99-
#[divan::bench(args = [1000, 10000, 100000])]
97+
#[divan::bench(args = [10000, 100000, 1000000])]
10098
fn case_when_all_true(bencher: Bencher, size: usize) {
10199
let array = make_struct_array(size);
102100

103101
// CASE WHEN value >= 0 THEN 100 ELSE 0 END (always true for our data)
104-
let expr = case_when(
102+
let expr = case_when([
105103
gt(get_item("value", root()), lit(-1i32)),
106104
lit(100i32),
107105
lit(0i32),
108-
);
106+
]);
109107

110108
bencher
111-
.with_inputs(|| (&expr, &array))
112-
.bench_refs(|(expr, array)| {
113-
let mut ctx = SESSION.create_execution_ctx();
109+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
110+
.bench_refs(|(expr, array, ctx)| {
114111
array
115112
.apply(expr)
116113
.unwrap()
117-
.execute::<Canonical>(&mut ctx)
114+
.execute::<Canonical>(ctx)
118115
.unwrap()
119116
});
120117
}
121118

122119
/// Benchmark CASE WHEN where all conditions are false (short-circuit path).
123-
#[divan::bench(args = [1000, 10000, 100000])]
120+
#[divan::bench(args = [10000, 100000, 1000000])]
124121
fn case_when_all_false(bencher: Bencher, size: usize) {
125122
let array = make_struct_array(size);
126123

127124
// CASE WHEN value > 1000000 THEN 100 ELSE 0 END (always false for our data)
128-
let expr = case_when(
125+
let expr = case_when([
129126
gt(get_item("value", root()), lit(1_000_000i32)),
130127
lit(100i32),
131128
lit(0i32),
132-
);
129+
]);
130+
131+
bencher
132+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
133+
.bench_refs(|(expr, array, ctx)| {
134+
array
135+
.apply(expr)
136+
.unwrap()
137+
.execute::<Canonical>(ctx)
138+
.unwrap()
139+
});
140+
}
141+
142+
/// Benchmark n-ary CASE WHEN with 10 conditions.
143+
#[divan::bench(args = [1000, 10000, 100000])]
144+
fn case_when_nary_10_conditions(bencher: Bencher, size: usize) {
145+
let array = make_struct_array(size);
146+
147+
// Build 10 WHEN/THEN pairs with decreasing thresholds
148+
let expr = case_when([
149+
gt(get_item("value", root()), lit(900i32)),
150+
lit(10i32),
151+
gt(get_item("value", root()), lit(800i32)),
152+
lit(9i32),
153+
gt(get_item("value", root()), lit(700i32)),
154+
lit(8i32),
155+
gt(get_item("value", root()), lit(600i32)),
156+
lit(7i32),
157+
gt(get_item("value", root()), lit(500i32)),
158+
lit(6i32),
159+
gt(get_item("value", root()), lit(400i32)),
160+
lit(5i32),
161+
gt(get_item("value", root()), lit(300i32)),
162+
lit(4i32),
163+
gt(get_item("value", root()), lit(200i32)),
164+
lit(3i32),
165+
gt(get_item("value", root()), lit(100i32)),
166+
lit(2i32),
167+
gt(get_item("value", root()), lit(0i32)),
168+
lit(1i32),
169+
lit(0i32), // else
170+
]);
171+
172+
bencher
173+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
174+
.bench_refs(|(expr, array, ctx)| {
175+
array
176+
.apply(expr)
177+
.unwrap()
178+
.execute::<Canonical>(ctx)
179+
.unwrap()
180+
});
181+
}
182+
183+
/// Benchmark n-ary CASE WHEN with 100 conditions.
184+
#[divan::bench(args = [10000, 100000, 1000000])]
185+
fn case_when_nary_100_conditions(bencher: Bencher, size: usize) {
186+
use vortex_array::expr::Expression;
187+
188+
let array = make_struct_array(size);
189+
190+
// Build 100 WHEN/THEN pairs programmatically
191+
let mut children: Vec<Expression> = Vec::with_capacity(201);
192+
for i in (1..=100).rev() {
193+
let threshold = i * 10; // thresholds: 1000, 990, 980, ..., 10
194+
children.push(gt(get_item("value", root()), lit(threshold)));
195+
children.push(lit(i));
196+
}
197+
children.push(lit(0i32)); // else
198+
199+
let expr = case_when(children);
133200

134201
bencher
135-
.with_inputs(|| (&expr, &array))
136-
.bench_refs(|(expr, array)| {
137-
let mut ctx = SESSION.create_execution_ctx();
202+
.with_inputs(|| (&expr, &array, SESSION.create_execution_ctx()))
203+
.bench_refs(|(expr, array, ctx)| {
138204
array
139205
.apply(expr)
140206
.unwrap()
141-
.execute::<Canonical>(&mut ctx)
207+
.execute::<Canonical>(ctx)
142208
.unwrap()
143209
});
144210
}

0 commit comments

Comments
 (0)