Skip to content

Commit d0a74d5

Browse files
mir: Use filters with functions as join conditions
Modify `filters_to_join_keys` to pull filters with function calls into join conditions. Injecting a projection node between the join and its parents that evaluates the function. Also, handle the edge case where we have a filter that couldn't be pulled right after a cross-join. Cross joins add bogokeys to themselves because all joins need conditions; however, the filter doesn't expect that key and therefore receives one more column than it expects. One solution is to move the bogokey creation from the lowering phase to the mir rewrite phase since we have a mir rewrite rule that ensures all columns are pulled. We modify the existing `add_bogokey` rewrite rule to add bogokeys to cross join. We also change the rule's order to execute after pulling filters and before pulling columns (the rule that makes sure that nodes account for their parents' cols). Fixes: REA-4801 Closes: #1372 Release-Note-Core: Readyset now pulls filter conditions with function calls into join keys. Change-Id: Ib58e421e2138ef557fa45b8ccbe46d545378725c Reviewed-on: https://gerrit.readyset.name/c/readyset/+/9084 Tested-by: Buildkite CI Reviewed-by: Jason Brown <[email protected]> Reviewed-by: Vassili Zarouba <[email protected]>
1 parent 6c9c981 commit d0a74d5

File tree

6 files changed

+655
-69
lines changed

6 files changed

+655
-69
lines changed

logictests/join_predicates_in_where_clause.test

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,129 @@ select a.id, c.other from a join (select * from b) as c where a.b_id = c.id;
4545
30
4646
4
4747
50
48+
49+
statement ok
50+
create table c(id int, name text);
51+
52+
statement ok
53+
create table d(id int, name text);
54+
55+
statement ok
56+
insert into c values (1, 'hello'), (2, 'WORLD'), (3, 'HELLO');
57+
58+
statement ok
59+
insert into d values (1, 'hello'), (4, 'not world');
60+
61+
query IT rowsort
62+
select c.id, d.name from c join d where c.name = d.name;
63+
----
64+
1
65+
hello
66+
67+
query IT rowsort
68+
select c.id, d.name from c join d where lower(c.name) = d.name;
69+
----
70+
1
71+
hello
72+
3
73+
hello
74+
75+
query IT rowsort
76+
select c.id, d.name from c join d where c.name = upper(d.name);
77+
----
78+
3
79+
hello
80+
81+
query TI rowsort
82+
select c.name, d.id from c join d where lower(c.name) = substr(d.name, 5, 5);
83+
----
84+
WORLD
85+
4
86+
87+
# Pushed function should not appear when doing select *
88+
query ITIT rowsort
89+
select * from c join d where lower(c.name) = substr(d.name, 5, 5);
90+
----
91+
2
92+
WORLD
93+
4
94+
not world
95+
96+
97+
query IT rowsort
98+
select c.id, d.name from c join d where lower(d.name) = lower(c.name);
99+
----
100+
1
101+
hello
102+
3
103+
hello
104+
105+
query IT rowsort
106+
select c.id, d.name from c join d where upper(d.name) = c.name;
107+
----
108+
3
109+
hello
110+
111+
# Test nested functions
112+
query IT rowsort
113+
select c.id, d.name from c join d where ascii(substr(lower(c.name), 1, 5)) = ascii(lower(upper(d.name)));
114+
----
115+
1
116+
hello
117+
3
118+
hello
119+
120+
# Test with AND'ed conditions
121+
query IT rowsort
122+
select c.id, d.name from c join d where
123+
lower(c.name) = lower(substr(d.name, 1, 5))
124+
and c.id = d.id
125+
----
126+
1
127+
hello
128+
129+
query IT rowsort
130+
select c.id, d.name from c join d
131+
where lower(c.name) = substr(d.name, 1, 5)
132+
and ascii(substr(lower(c.name), 1, 1)) = ascii(substr(lower(d.name), 1, 1));
133+
----
134+
1
135+
hello
136+
3
137+
hello
138+
139+
# A filter that can't be pushed
140+
query IT rowsort
141+
select c.id, d.name from c join d
142+
where round(c.id) = d.id;
143+
----
144+
1
145+
hello
146+
147+
# A filter that can be pushed, and another that can't
148+
query IT rowsort
149+
select c.id, d.name from c join d
150+
where lower(c.name) = lower(substr(d.name, 1, 5))
151+
and round(c.id) = d.id;
152+
----
153+
1
154+
hello
155+
156+
# Test where filter is already a condition, but contains function calls
157+
query IT rowsort
158+
select c.id, d.name from c join d on lower(c.name) = lower(d.name);
159+
----
160+
1
161+
hello
162+
3
163+
hello
164+
165+
# Filter uses one side of the join
166+
query IT rowsort
167+
select c.id, d.name from c join d
168+
where lower(c.name) = substr(c.name, 1, 5);
169+
----
170+
1
171+
hello
172+
1
173+
not world

readyset-mir/src/rewrite/add_bogokey.rs

Lines changed: 171 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,7 @@ use crate::node::{MirNode, MirNodeInner};
1010
use crate::query::MirQuery;
1111
use crate::Column;
1212

13-
/// If the given query has a Leaf but doesn't have any keys, create a key for it by adding a new
14-
/// node to the query that projects out a constant literal value (a "bogokey", from "bogus key") and
15-
/// making that the key for the query.
16-
///
17-
/// This pass will also handle ensuring that any topk or paginate nodes in leaf position in such
18-
/// queries have `group_by` columns, by lifting the bogokey project node over those nodes and adding
19-
/// the bogokey to their `group_by`
20-
pub(crate) fn add_bogokey_if_necessary(query: &mut MirQuery<'_>) -> ReadySetResult<()> {
13+
fn add_bogokey_leaf(query: &mut MirQuery<'_>) -> ReadySetResult<()> {
2114
match &query.leaf_node().inner {
2215
MirNodeInner::Leaf { keys, .. } if keys.is_empty() => {}
2316
_ => {
@@ -87,7 +80,83 @@ pub(crate) fn add_bogokey_if_necessary(query: &mut MirQuery<'_>) -> ReadySetResu
8780
&mut query.get_node_mut(node_to_insert_above).unwrap().inner
8881
{
8982
group_by.push(Column::named("bogokey"))
90-
}
83+
};
84+
85+
Ok(())
86+
}
87+
88+
fn add_bogokey_join(query: &mut MirQuery<'_>) -> ReadySetResult<()> {
89+
let cross_joins = query
90+
.node_references()
91+
.filter(|(_, node)| {
92+
matches!(
93+
node,
94+
MirNode {
95+
inner: MirNodeInner::Join { on, .. },
96+
..
97+
} if on.is_empty()
98+
)
99+
})
100+
.map(|(idx, _)| idx)
101+
.collect::<Vec<_>>();
102+
103+
cross_joins
104+
.iter()
105+
.try_for_each(|idx| -> ReadySetResult<()> {
106+
trace!(?idx, "Adding bogokey to cross join");
107+
108+
let ancestors = query.ancestors(*idx).unwrap();
109+
for (i, ancestor) in ancestors.into_iter().enumerate() {
110+
query.insert_below(
111+
ancestor,
112+
MirNode::new(
113+
format!("{}_bogo_project_{}", query.name().display_unquoted(), i).into(),
114+
MirNodeInner::Project {
115+
emit: query
116+
.graph
117+
.columns(ancestor)
118+
.into_iter()
119+
.map(ProjectExpr::Column)
120+
.chain(iter::once(ProjectExpr::Expr {
121+
expr: Expr::Literal(0.into()),
122+
alias: "bogokey".into(),
123+
}))
124+
.collect(),
125+
},
126+
),
127+
)?;
128+
}
129+
130+
match &mut query.get_node_mut(*idx).unwrap().inner {
131+
MirNodeInner::Join { on, project } => {
132+
on.push((Column::named("bogokey"), Column::named("bogokey")));
133+
project.push(Column::named("bogokey"));
134+
}
135+
_ => unreachable!(),
136+
}
137+
138+
Ok(())
139+
})?;
140+
141+
Ok(())
142+
}
143+
144+
/// A few scenarios where a bogokey (from "bogus key") is needed:
145+
///
146+
/// If the given query has a Leaf but doesn't have any keys, create a key for it by adding a new
147+
/// node to the query that projects out a constant literal value (a "bogokey", from "bogus key") and
148+
/// making that the key for the query.
149+
///
150+
/// This pass will also handle ensuring that any topk or paginate nodes in leaf position in such
151+
/// queries have `group_by` columns, by lifting the bogokey project node over those nodes and adding
152+
/// the bogokey to their `group_by`
153+
///
154+
/// Consider a join node without a join condition (i.e. a cross-join); however, all join nodes
155+
/// require a join condition, so we add a bogokey projection to both of the join's parents and
156+
/// use that as a filter condition..
157+
pub(crate) fn add_bogokey_if_necessary(query: &mut MirQuery<'_>) -> ReadySetResult<()> {
158+
add_bogokey_leaf(query)?;
159+
add_bogokey_join(query)?;
91160

92161
Ok(())
93162
}
@@ -313,4 +382,97 @@ mod tests {
313382
_ => panic!(),
314383
}
315384
}
385+
386+
#[test]
387+
fn test_add_bogokey_to_cross_join_node() {
388+
let query_name = Relation::from("query_needing_bogokey");
389+
let mut mir_graph = MirGraph::new();
390+
391+
let left = mir_graph.add_node(MirNode::new(
392+
"left_base".into(),
393+
MirNodeInner::Base {
394+
column_specs: vec![ColumnSpecification {
395+
column: ast::Column::from("a"),
396+
sql_type: SqlType::Int(None),
397+
generated: None,
398+
constraints: vec![],
399+
comment: None,
400+
}],
401+
primary_key: Some([Column::from("a")].into()),
402+
unique_keys: Default::default(),
403+
},
404+
));
405+
mir_graph[left].add_owner(query_name.clone());
406+
407+
let right = mir_graph.add_node(MirNode::new(
408+
"right_base".into(),
409+
MirNodeInner::Base {
410+
column_specs: vec![ColumnSpecification {
411+
column: ast::Column::from("b"),
412+
sql_type: SqlType::Int(None),
413+
generated: None,
414+
constraints: vec![],
415+
comment: None,
416+
}],
417+
primary_key: Some([Column::from("b")].into()),
418+
unique_keys: Default::default(),
419+
},
420+
));
421+
mir_graph[right].add_owner(query_name.clone());
422+
423+
let join_node = mir_graph.add_node(MirNode::new(
424+
"join_node".into(),
425+
MirNodeInner::Join {
426+
on: vec![], // Empty join condition (i.e. cross join)
427+
project: vec![Column::named("a"), Column::named("b")],
428+
},
429+
));
430+
mir_graph[join_node].add_owner(query_name.clone());
431+
mir_graph.add_edge(left, join_node, 0);
432+
mir_graph.add_edge(right, join_node, 1);
433+
434+
let mut query = MirQuery::new(query_name.clone(), join_node, &mut mir_graph);
435+
436+
add_bogokey_if_necessary(&mut query).unwrap();
437+
438+
match &query.get_node(join_node).unwrap().inner {
439+
MirNodeInner::Join { on, .. } => {
440+
assert_eq!(on.len(), 1, "Expected a bogo key to be added");
441+
assert_eq!(
442+
on[0].0.name, "bogokey",
443+
"Bogo key column name should be 'bogokey'"
444+
);
445+
assert_eq!(
446+
on[0].1.name, "bogokey",
447+
"Bogo key column name should be 'bogokey'"
448+
);
449+
}
450+
_ => panic!("Leaf node is not a Join node"),
451+
}
452+
453+
// Helper closure to validate parent projections
454+
let check_projection_node = |parent_name: &str| {
455+
let parent = mir_graph
456+
.neighbors_directed(join_node, petgraph::Direction::Incoming)
457+
.inspect(|p| println!("Parent: {:?}", mir_graph[*p]))
458+
.find(|&n| mir_graph[n].name() == &Relation::from(parent_name))
459+
.unwrap_or_else(|| panic!("Expected a projection node for {}", parent_name));
460+
461+
match &mir_graph[parent].inner {
462+
MirNodeInner::Project { emit, .. } => {
463+
assert!(
464+
emit.iter().any(
465+
|c| matches!(c, ProjectExpr::Expr{ alias, ..} if alias == "bogokey")
466+
),
467+
"{} projection should output bogokey",
468+
parent_name
469+
);
470+
}
471+
_ => panic!("{} parent is not a Projection node", parent_name),
472+
}
473+
};
474+
475+
check_projection_node(format!("{}_bogo_project_0", query_name.display_unquoted()).as_str());
476+
check_projection_node(format!("{}_bogo_project_1", query_name.display_unquoted()).as_str());
477+
}
316478
}

0 commit comments

Comments
 (0)