Skip to content

Commit 8f06348

Browse files
committed
perf: Adjust TupleKey encoding to default to NULLS LAST, allowing Min/Max to be rewritten as TopK without filtering NULLs
1 parent 329b687 commit 8f06348

File tree

13 files changed

+135
-418
lines changed

13 files changed

+135
-418
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ Run `make tpcc-dual` to mirror every TPCC statement to an in-memory SQLite datab
9393
All cases have been fully optimized.
9494
```shell
9595
<90th Percentile RT (MaxRT)>
96-
New-Order : 0.002 (0.006)
97-
Payment : 0.001 (0.019)
98-
Order-Status : 0.001 (0.003)
99-
Delivery : 0.022 (0.038)
100-
Stock-Level : 0.002 (0.005)
96+
New-Order : 0.002 (0.005)
97+
Payment : 0.001 (0.013)
98+
Order-Status : 0.002 (0.006)
99+
Delivery : 0.010 (0.023)
100+
Stock-Level : 0.002 (0.017)
101101
<TpmC>
102-
18432 Tpmc
102+
27226 Tpmc
103103
```
104104
#### 👉[check more](tpcc/README.md)
105105

src/binder/aggregate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ impl<T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'_, '_, T, A>
9898
return_orderby.push(SortField::new(
9999
expr,
100100
asc.is_none_or(|asc| asc),
101-
nulls_first.unwrap_or(true),
101+
nulls_first.unwrap_or(false),
102102
));
103103
}
104104
Some(return_orderby)

src/optimizer/core/memo.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ mod tests {
191191
let sort_fields = vec![SortField::new(
192192
ScalarExpression::column_expr(c1_column.clone()),
193193
true,
194-
true,
194+
false,
195195
)];
196196
let scala_functions = Default::default();
197197
let table_functions = Default::default();

src/optimizer/rule/normalization/agg_elimination.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ fn distinct_sort_fields(groupby_exprs: &[ScalarExpression]) -> Vec<SortField> {
179179
groupby_exprs
180180
.iter()
181181
.cloned()
182-
.map(|expr| SortField::new(expr, true, true))
182+
.map(|expr| SortField::new(expr, true, false))
183183
.collect()
184184
}
185185

@@ -349,7 +349,7 @@ mod tests {
349349

350350
fn make_sort_field(name: &str) -> SortField {
351351
let column = ColumnRef::from(ColumnCatalog::new_dummy(name.to_string()));
352-
SortField::new(ScalarExpression::column_expr(column), true, true)
352+
SortField::new(ScalarExpression::column_expr(column), true, false)
353353
}
354354

355355
fn build_plan(
@@ -427,7 +427,7 @@ mod tests {
427427
let sort_fields = vec![SortField::new(
428428
ScalarExpression::column_expr(c1.clone()),
429429
true,
430-
true,
430+
false,
431431
)];
432432
let sort_option = SortOption::OrderBy {
433433
fields: sort_fields.clone(),
@@ -523,7 +523,7 @@ mod tests {
523523
#[test]
524524
fn annotate_sets_sort_hint_on_table_scan() -> Result<(), DatabaseError> {
525525
let column = ColumnRef::from(ColumnCatalog::new_dummy("c1".to_string()));
526-
let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, true);
526+
let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, false);
527527
let (index_info, _) = build_index_info(vec![sort_field.clone()], 0);
528528

529529
let mut columns = BTreeMap::new();
@@ -625,7 +625,7 @@ mod tests {
625625
#[test]
626626
fn promote_index_to_remove_sort() -> Result<(), DatabaseError> {
627627
let column = ColumnRef::from(ColumnCatalog::new_dummy("c_first".to_string()));
628-
let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, true);
628+
let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, false);
629629
let (mut index_info, _) = build_index_info(vec![sort_field.clone()], 0);
630630
index_info.range = Some(Range::Scope {
631631
min: Bound::Unbounded,

src/optimizer/rule/normalization/min_max_top_k.rs

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ use crate::expression::ScalarExpression;
1818
use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate};
1919
use crate::optimizer::core::rule::{MatchPattern, NormalizationRule};
2020
use crate::optimizer::plan_utils::{only_child, wrap_child_with};
21-
use crate::planner::operator::filter::FilterOperator;
2221
use crate::planner::operator::sort::SortField;
2322
use crate::planner::operator::top_k::TopKOperator;
2423
use crate::planner::operator::Operator;
@@ -60,7 +59,7 @@ impl NormalizationRule for MinMaxToTopK {
6059
_ => return Ok(false),
6160
};
6261

63-
let sort_field = SortField::new(args[0].clone(), asc, true);
62+
let sort_field = SortField::new(args[0].clone(), asc, false);
6463
let already_topk = match only_child(plan) {
6564
Some(child) => match &child.operator {
6665
Operator::TopK(topk) => {
@@ -77,23 +76,6 @@ impl NormalizationRule for MinMaxToTopK {
7776
return Ok(false);
7877
}
7978

80-
// IndexScan prioritizes indexed columns as null first.
81-
// Therefore, to ensure Top K is eliminated when an index exists,
82-
// we set it to null first and filter null rows.
83-
let predicate = ScalarExpression::IsNull {
84-
negated: true,
85-
expr: Box::new(args[0].clone()),
86-
};
87-
let filter = Operator::Filter(FilterOperator {
88-
predicate,
89-
is_optimized: false,
90-
having: false,
91-
});
92-
93-
if !wrap_child_with(plan, 0, filter) {
94-
return Ok(false);
95-
}
96-
9779
// Agg do not remove, because when the table is empty, MIN/MAX should return a NULL row.
9880
Ok(wrap_child_with(
9981
plan,
@@ -167,28 +149,13 @@ mod tests {
167149
assert!(topk.offset.is_none());
168150
assert_eq!(topk.sort_fields.len(), 1);
169151
assert!(topk.sort_fields[0].asc);
170-
assert!(topk.sort_fields[0].nulls_first);
152+
assert!(!topk.sort_fields[0].nulls_first);
171153
let args = match &op.agg_calls[0] {
172154
crate::expression::ScalarExpression::AggCall { args, .. } => args,
173155
_ => unreachable!("Aggregate should use AggCall"),
174156
};
175157
assert_eq!(topk.sort_fields[0].expr, args[0]);
176158

177-
let filter_plan = match topk_plan.childrens.as_ref() {
178-
Childrens::Only(child) => child.as_ref(),
179-
_ => unreachable!("TopK should have one child"),
180-
};
181-
match &filter_plan.operator {
182-
Operator::Filter(filter_op) => match &filter_op.predicate {
183-
crate::expression::ScalarExpression::IsNull { negated, expr } => {
184-
assert!(*negated);
185-
assert_eq!(**expr, args[0]);
186-
}
187-
_ => unreachable!("Expected IS NOT NULL filter under TopK"),
188-
},
189-
_ => unreachable!("Expected Filter under TopK"),
190-
}
191-
192159
Ok(())
193160
}
194161

@@ -216,21 +183,7 @@ mod tests {
216183
assert!(topk.offset.is_none());
217184
assert_eq!(topk.sort_fields.len(), 1);
218185
assert!(!topk.sort_fields[0].asc);
219-
assert!(topk.sort_fields[0].nulls_first);
220-
221-
let filter_plan = match topk_plan.childrens.as_ref() {
222-
Childrens::Only(child) => child.as_ref(),
223-
_ => unreachable!("TopK should have one child"),
224-
};
225-
match &filter_plan.operator {
226-
Operator::Filter(filter_op) => match &filter_op.predicate {
227-
crate::expression::ScalarExpression::IsNull { negated, .. } => {
228-
assert!(*negated);
229-
}
230-
_ => unreachable!("Expected IS NOT NULL filter under TopK"),
231-
},
232-
_ => unreachable!("Expected Filter under TopK"),
233-
}
186+
assert!(!topk.sort_fields[0].nulls_first);
234187

235188
Ok(())
236189
}

src/planner/operator/table_scan.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ impl TableScanOperator {
7070
sort_fields.push(SortField {
7171
expr: ScalarExpression::column_expr(column.clone()),
7272
asc: true,
73-
nulls_first: true,
73+
nulls_first: false,
7474
})
7575
}
7676

src/storage/table_codec.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ use std::sync::LazyLock;
3030

3131
pub(crate) const BOUND_MIN_TAG: u8 = u8::MIN;
3232
pub(crate) const BOUND_MAX_TAG: u8 = u8::MAX;
33-
pub(crate) const NULL_TAG: u8 = 0u8;
34-
pub(crate) const NOTNULL_TAG: u8 = 1u8;
33+
// Nulls Last default
34+
pub(crate) const NULL_TAG: u8 = 1u8;
35+
pub(crate) const NOTNULL_TAG: u8 = 0u8;
3536
const TABLE_NAME_HASH_LEN: usize = 8;
3637
const KEY_TYPE_TAG_LEN: usize = 1;
3738
const KEY_BOUND_LEN: usize = 1;

src/types/value.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -756,9 +756,9 @@ impl DataValue {
756756
nulls_first: bool,
757757
) -> Result<(), DatabaseError> {
758758
let (null_tag, not_null_tag) = if nulls_first {
759-
(NULL_TAG, NOTNULL_TAG)
760-
} else {
761759
(NOTNULL_TAG, NULL_TAG)
760+
} else {
761+
(NULL_TAG, NOTNULL_TAG)
762762
};
763763
if let DataValue::Null = self {
764764
b.push(null_tag);
@@ -822,7 +822,7 @@ impl DataValue {
822822

823823
#[inline]
824824
pub fn memcomparable_encode(&self, b: &mut BumpBytes) -> Result<(), DatabaseError> {
825-
self.memcomparable_encode_with_null_order(b, true)
825+
self.memcomparable_encode_with_null_order(b, false)
826826
}
827827

828828
pub fn memcomparable_decode<R: Read>(
@@ -839,7 +839,7 @@ impl DataValue {
839839
// for index cover mapping reduce one layer of conversion
840840
tuple_mapping: Option<TupleMappingRef<'_>>,
841841
) -> Result<DataValue, DatabaseError> {
842-
if reader.read_u8()? == 0u8 {
842+
if reader.read_u8()? == NULL_TAG {
843843
return Ok(DataValue::Null);
844844
}
845845
match ty {
@@ -2273,9 +2273,9 @@ mod test {
22732273
println!("{key_i8_0:?} < {key_i8_1:?}");
22742274
println!("{key_i8_1:?} < {key_i8_2:?}");
22752275
println!("{key_i8_2:?} < {key_i8_3:?}");
2276-
assert!(key_i8_0 < key_i8_1);
22772276
assert!(key_i8_1 < key_i8_2);
22782277
assert!(key_i8_2 < key_i8_3);
2278+
assert!(key_i8_3 < key_i8_0);
22792279

22802280
assert_eq!(
22812281
value_0,
@@ -2329,9 +2329,9 @@ mod test {
23292329
v_i8_2.memcomparable_encode(&mut key_i8_2)?;
23302330
v_i8_3.memcomparable_encode(&mut key_i8_3)?;
23312331

2332-
assert!(key_i8_0 < key_i8_1);
23332332
assert!(key_i8_1 < key_i8_2);
23342333
assert!(key_i8_2 < key_i8_3);
2334+
assert!(key_i8_3 < key_i8_0);
23352335

23362336
assert_eq!(
23372337
v_i8_0,
@@ -2378,9 +2378,9 @@ mod test {
23782378
v_i16_2.memcomparable_encode(&mut key_i16_2)?;
23792379
v_i16_3.memcomparable_encode(&mut key_i16_3)?;
23802380

2381-
assert!(key_i16_0 < key_i16_1);
23822381
assert!(key_i16_1 < key_i16_2);
23832382
assert!(key_i16_2 < key_i16_3);
2383+
assert!(key_i16_3 < key_i16_0);
23842384

23852385
assert_eq!(
23862386
v_i16_0,
@@ -2427,9 +2427,9 @@ mod test {
24272427
v_i32_2.memcomparable_encode(&mut key_i32_2)?;
24282428
v_i32_3.memcomparable_encode(&mut key_i32_3)?;
24292429

2430-
assert!(key_i32_0 < key_i32_1);
24312430
assert!(key_i32_1 < key_i32_2);
24322431
assert!(key_i32_2 < key_i32_3);
2432+
assert!(key_i32_3 < key_i32_0);
24332433

24342434
assert_eq!(
24352435
v_i32_0,
@@ -2476,9 +2476,9 @@ mod test {
24762476
v_i64_2.memcomparable_encode(&mut key_i64_2)?;
24772477
v_i64_3.memcomparable_encode(&mut key_i64_3)?;
24782478

2479-
assert!(key_i64_0 < key_i64_1);
24802479
assert!(key_i64_1 < key_i64_2);
24812480
assert!(key_i64_2 < key_i64_3);
2481+
assert!(key_i64_3 < key_i64_0);
24822482

24832483
assert_eq!(
24842484
v_i64_0,
@@ -2532,9 +2532,9 @@ mod test {
25322532
v_f32_2.memcomparable_encode(&mut key_f32_2)?;
25332533
v_f32_3.memcomparable_encode(&mut key_f32_3)?;
25342534

2535-
assert!(key_f32_0 < key_f32_1);
25362535
assert!(key_f32_1 < key_f32_2);
25372536
assert!(key_f32_2 < key_f32_3);
2537+
assert!(key_f32_3 < key_f32_0);
25382538

25392539
assert_eq!(
25402540
v_f32_0,
@@ -2569,9 +2569,9 @@ mod test {
25692569
v_f64_2.memcomparable_encode(&mut key_f64_2)?;
25702570
v_f64_3.memcomparable_encode(&mut key_f64_3)?;
25712571

2572-
assert!(key_f64_0 < key_f64_1);
25732572
assert!(key_f64_1 < key_f64_2);
25742573
assert!(key_f64_2 < key_f64_3);
2574+
assert!(key_f64_3 < key_f64_0);
25752575

25762576
assert_eq!(
25772577
v_f64_0,
@@ -2628,9 +2628,9 @@ mod test {
26282628
println!("{key_decimal_1:?} < {key_decimal_2:?}");
26292629
println!("{key_decimal_2:?} < {key_decimal_3:?}");
26302630

2631-
assert!(key_decimal_0 < key_decimal_1);
26322631
assert!(key_decimal_1 < key_decimal_2);
26332632
assert!(key_decimal_2 < key_decimal_3);
2633+
assert!(key_decimal_3 < key_decimal_0);
26342634

26352635
assert_eq!(
26362636
v_decimal_0,
@@ -2694,8 +2694,8 @@ mod test {
26942694
println!("{key_tuple_1:?} < {key_tuple_2:?}");
26952695
println!("{key_tuple_2:?} < {key_tuple_3:?}");
26962696

2697-
assert!(key_tuple_1 < key_tuple_2);
26982697
assert!(key_tuple_2 < key_tuple_3);
2698+
assert!(key_tuple_3 < key_tuple_1);
26992699

27002700
assert_eq!(
27012701
v_tuple_1,
@@ -2766,8 +2766,8 @@ mod test {
27662766
v_tuple_2.memcomparable_encode(&mut key_tuple_2)?;
27672767
v_tuple_3.memcomparable_encode(&mut key_tuple_3)?;
27682768

2769-
assert!(key_tuple_1 < key_tuple_2);
27702769
assert!(key_tuple_2 < key_tuple_3);
2770+
assert!(key_tuple_3 < key_tuple_1);
27712771

27722772
let ty = LogicalType::Tuple(vec![
27732773
LogicalType::Tinyint,
@@ -2865,10 +2865,10 @@ mod test {
28652865
v_zh.memcomparable_encode(&mut key_zh)?;
28662866

28672867
// ordering
2868-
assert!(key_null < key_a);
28692868
assert!(key_a < key_ab);
28702869
assert!(key_ab < key_b);
28712870
assert!(key_b < key_zh);
2871+
assert!(key_zh < key_null);
28722872

28732873
// decode check
28742874
assert_eq!(

0 commit comments

Comments
 (0)