Skip to content

Commit 2a5413c

Browse files
committed
opt: add rule to pull filters out of EXISTS condition
This commit adds a new norm rule `HoistUnboundFilterFromExistsSubquery` that pulls a filter out of an EXISTS filter if the inner filter only references columns from the outer query. This gives other optimizations a chance to apply before filter push-down rules move the EXISTS subquery and everything in it below joins and other operators. Fixes #146000 Release note: None
1 parent b99dcdb commit 2a5413c

File tree

10 files changed

+618
-218
lines changed

10 files changed

+618
-218
lines changed

pkg/sql/opt/memo/testdata/logprops/join

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -996,39 +996,38 @@ project
996996
opt
997997
SELECT * FROM (SELECT count(*) cnt FROM xysd) WHERE EXISTS(SELECT * FROM uv WHERE cnt=1)
998998
----
999-
project
999+
select
10001000
├── columns: cnt:7(int!null)
10011001
├── cardinality: [0 - 1]
10021002
├── key: ()
10031003
├── fd: ()-->(7)
1004-
└── inner-join (cross)
1005-
├── columns: count_rows:7(int!null)
1006-
├── cardinality: [0 - 1]
1007-
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one)
1008-
├── key: ()
1009-
├── fd: ()-->(7)
1010-
├── select
1011-
│ ├── columns: count_rows:7(int!null)
1012-
│ ├── cardinality: [0 - 1]
1013-
│ ├── key: ()
1014-
│ ├── fd: ()-->(7)
1015-
│ ├── scalar-group-by
1016-
│ │ ├── columns: count_rows:7(int!null)
1017-
│ │ ├── cardinality: [1 - 1]
1018-
│ │ ├── key: ()
1019-
│ │ ├── fd: ()-->(7)
1020-
│ │ ├── prune: (7)
1021-
│ │ ├── scan xysd@xysd_s_d_key
1022-
│ │ └── aggregations
1023-
│ │ └── count-rows [as=count_rows:7, type=int]
1024-
│ └── filters
1025-
│ └── eq [type=bool, outer=(7), constraints=(/7: [/1 - /1]; tight), fd=()-->(7)]
1026-
│ ├── variable: count_rows:7 [type=int]
1027-
│ └── const: 1 [type=int]
1028-
├── scan uv
1029-
│ ├── limit: 1
1030-
│ └── key: ()
1031-
└── filters (true)
1004+
├── scalar-group-by
1005+
│ ├── columns: count_rows:7(int!null)
1006+
│ ├── cardinality: [1 - 1]
1007+
│ ├── key: ()
1008+
│ ├── fd: ()-->(7)
1009+
│ ├── prune: (7)
1010+
│ ├── scan xysd@xysd_s_d_key
1011+
│ └── aggregations
1012+
│ └── count-rows [as=count_rows:7, type=int]
1013+
└── filters
1014+
├── coalesce [type=bool, subquery]
1015+
│ ├── subquery [type=bool]
1016+
│ │ └── project
1017+
│ │ ├── columns: column14:14(bool!null)
1018+
│ │ ├── cardinality: [0 - 1]
1019+
│ │ ├── key: ()
1020+
│ │ ├── fd: ()-->(14)
1021+
│ │ ├── prune: (14)
1022+
│ │ ├── scan uv
1023+
│ │ │ ├── limit: 1
1024+
│ │ │ └── key: ()
1025+
│ │ └── projections
1026+
│ │ └── true [as=column14:14, type=bool]
1027+
│ └── false [type=bool]
1028+
└── eq [type=bool, outer=(7), constraints=(/7: [/1 - /1]; tight), fd=()-->(7)]
1029+
├── variable: count_rows:7 [type=int]
1030+
└── const: 1 [type=int]
10321031

10331032
# Maximum cardinality of the right input is propagated to the SemiJoin when
10341033
# right rows are guaranteed at most one match each over the join filters.

pkg/sql/opt/memo/testdata/logprops/limit

Lines changed: 76 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -247,83 +247,83 @@ WHERE t1.b IN (
247247
)
248248
ORDER BY t1.a ASC;
249249
----
250-
sort
251-
├── columns: "?column?":23(int!null) [hidden: t1.a:1(int!null)]
250+
project
251+
├── columns: "?column?":24(int!null) [hidden: t1.a:1(int!null)]
252252
├── key: (1)
253-
├── fd: ()-->(23)
254-
├── ordering: +1 opt(23) [actual: +1]
255-
├── prune: (1,23)
256-
├── interesting orderings: (+1 opt(23))
257-
── project
258-
├── columns: "?column?":23(int!null) t1.a:1(int!null)
259-
├── key: (1)
260-
├── fd: ()-->(23)
261-
├── prune: (1,23)
262-
├── interesting orderings: (+1 opt(23))
263-
├── semi-join (cross)
264-
│ ├── columns: t1.a:1(int!null) t1.b:2(int!null)
265-
│ ├── key: (1)
266-
│ ├── fd: ()-->(2)
267-
│ ├── prune: (1)
268-
│ ├── interesting orderings: (+1 opt(2))
269-
│ ├── select
270-
│ │ ── columns: t1.a:1(int!null) t1.b:2(int!null)
271-
│ ├── key: (1)
272-
├── fd: ()-->(2)
273-
│ ├── prune: (1)
274-
├── interesting orderings: (+1 opt(2))
275-
│ │ ├── scan t65038 [as=t1]
276-
│ │ ├── columns: t1.a:1(int!null) t1.b:2(int)
277-
│ │ ├── key: (1)
278-
│ │ ├── fd: (1)-->(2)
279-
│ │ ├── prune: (1,2)
280-
│ │ └── interesting orderings: (+1)
281-
│ │ ── filters
282-
└── eq [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)]
283-
├── variable: t1.b:2 [type=int]
284-
── const: 1 [type=int]
285-
│ ├── top-k
286-
│ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
287-
│ │ ├── internal-ordering: +(12|17)
288-
│ │ ── k: 1
289-
│ │ ├── cardinality: [0 - 1]
290-
│ │ ├── key: ()
291-
│ │ ├── fd: ()-->(11-13,16-18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
292-
│ │ ├── interesting orderings: (+(12|17))
293-
│ │ └── inner-join (cross)
294-
│ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
295-
│ │ ├── fd: (11)-->(12,13), (16)-->(17,18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
296-
│ │ ├── interesting orderings: (+11) (+16)
297-
│ │ ├── scan t65038
298-
│ │ │ ── unfiltered-cols: (6-10)
299-
│ │ ├── inner-join (merge)
300-
│ │ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
301-
│ │ │ ├── left ordering: +11,+12,+13
302-
│ │ │ ├── right ordering: +16,+17,+18
303-
│ │ │ ├── key: (16)
304-
│ │ │ ├── fd: (11)-->(12,13), (16)-->(17,18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
305-
│ │ │ ├── interesting orderings: (+11) (+16)
306-
│ │ │ ├── scan t65038 [as=t2]
307-
│ │ │ │ ├── columns: t2.a:11(int!null) t2.b:12(int) t2.c:13(int)
308-
│ │ │ │ ── key: (11)
309-
│ │ │ │ ├── fd: (11)-->(12,13)
310-
│ │ │ │ ├── ordering: +11
311-
│ │ │ │ ├── prune: (11-13)
312-
│ │ │ │ ├── interesting orderings: (+11)
313-
│ │ │ │ ── unfiltered-cols: (11-15)
314-
│ │ │ ├── scan t65038 [as=t3]
315-
│ │ │ │ ├── columns: t3.a:16(int!null) t3.b:17(int) t3.c:18(int)
316-
│ │ │ │ ── key: (16)
317-
│ │ │ │ ── fd: (16)-->(17,18)
318-
│ │ │ ── ordering: +16
319-
── prune: (16-18)
320-
── interesting orderings: (+16)
321-
└── unfiltered-cols: (16-20)
322-
│ └── filters (true)
323-
── filters (true)
324-
└── filters (true)
325-
└── projections
326-
└── const: 1 [as="?column?":23, type=int]
253+
├── fd: ()-->(24)
254+
├── ordering: +1 opt(24) [actual: +1]
255+
├── prune: (1,24)
256+
├── interesting orderings: (+1 opt(24))
257+
── select
258+
├── columns: t1.a:1(int!null) t1.b:2(int!null)
259+
├── key: (1)
260+
├── fd: ()-->(2)
261+
├── ordering: +1 opt(2) [actual: +1]
262+
├── prune: (1)
263+
├── interesting orderings: (+1 opt(2))
264+
│ ├── scan t65038 [as=t1]
265+
│ ├── columns: t1.a:1(int!null) t1.b:2(int)
266+
│ ├── key: (1)
267+
│ ├── fd: (1)-->(2)
268+
│ ├── ordering: +1
269+
│ ├── prune: (1,2)
270+
│ │ ── interesting orderings: (+1)
271+
── filters
272+
├── coalesce [type=bool, subquery]
273+
│ ├── subquery [type=bool]
274+
│ └── project
275+
│ │ ├── columns: column23:23(bool!null)
276+
│ │ ├── cardinality: [0 - 1]
277+
│ │ ├── key: ()
278+
│ │ ├── fd: ()-->(23)
279+
│ │ ├── prune: (23)
280+
│ │ ├── top-k
281+
│ │ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
282+
│ ├── internal-ordering: +(12|17)
283+
├── k: 1
284+
│ ├── cardinality: [0 - 1]
285+
│ │ ├── key: ()
286+
│ │ ├── fd: ()-->(11-13,16-18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
287+
│ │ ├── interesting orderings: (+(12|17))
288+
│ │ │ └── inner-join (cross)
289+
│ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
290+
│ │ ├── fd: (11)-->(12,13), (16)-->(17,18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
291+
│ │ ├── interesting orderings: (+11) (+16)
292+
│ │ ├── scan t65038
293+
│ │ │ │ └── unfiltered-cols: (6-10)
294+
│ │ │ ├── inner-join (merge)
295+
│ │ │ │ ├── columns: t2.a:11(int!null) t2.b:12(int!null) t2.c:13(int!null) t3.a:16(int!null) t3.b:17(int!null) t3.c:18(int!null)
296+
│ │ │ │ ├── left ordering: +11,+12,+13
297+
│ │ │ │ ├── right ordering: +16,+17,+18
298+
│ │ │ │ ├── key: (16)
299+
│ │ │ │ ├── fd: (11)-->(12,13), (16)-->(17,18), (11)==(16), (16)==(11), (13)==(18), (18)==(13), (12)==(17), (17)==(12)
300+
│ │ │ ├── interesting orderings: (+11) (+16)
301+
│ │ │ ├── scan t65038 [as=t2]
302+
│ │ │ │ │ ├── columns: t2.a:11(int!null) t2.b:12(int) t2.c:13(int)
303+
│ │ │ │ │ ├── key: (11)
304+
│ │ │ │ │ ├── fd: (11)-->(12,13)
305+
│ │ │ │ │ ├── ordering: +11
306+
│ │ │ │ │ ├── prune: (11-13)
307+
│ │ │ ├── interesting orderings: (+11)
308+
│ │ │ │ └── unfiltered-cols: (11-15)
309+
│ │ │ │ ├── scan t65038 [as=t3]
310+
│ │ │ ├── columns: t3.a:16(int!null) t3.b:17(int) t3.c:18(int)
311+
│ │ │ ├── key: (16)
312+
│ │ │ ├── fd: (16)-->(17,18)
313+
│ │ │ │ ├── ordering: +16
314+
│ │ │ │ │ ├── prune: (16-18)
315+
│ │ │ ├── interesting orderings: (+16)
316+
│ │ │ │ └── unfiltered-cols: (16-20)
317+
│ │ │ ── filters (true)
318+
│ │ │ ── filters (true)
319+
│ │ ── projections
320+
── true [as=column23:23, type=bool]
321+
│ │ └── false [type=bool]
322+
└── eq [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)]
323+
── variable: t1.b:2 [type=int]
324+
└── const: 1 [type=int]
325+
└── projections
326+
└── const: 1 [as="?column?":24, type=int]
327327

328328
opt
329329
SELECT * FROM xyzs ORDER BY y DESC LIMIT 10

pkg/sql/opt/memo/testdata/stats/project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ exec-ddl
302302
CREATE TABLE t124831 (a INT, b INT);
303303
----
304304

305-
norm disable=(SimplifyZeroCardinalityGroup,EliminateExistsZeroRows,SimplifyZeroCardinalitySemiJoin,PushFilterIntoJoinLeft)
305+
norm disable=(SimplifyZeroCardinalityGroup,EliminateExistsZeroRows,SimplifyZeroCardinalitySemiJoin,PushFilterIntoJoinLeft,HoistUnboundFilterFromExistsSubquery)
306306
SELECT a FROM t124831 WHERE NULL::INT IN (SELECT 1 LIMIT b);
307307
----
308308
project

pkg/sql/opt/norm/general_funcs.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,17 @@ func (c *CustomFuncs) RemoveFiltersItem(
769769
return filters.RemoveFiltersItem(search)
770770
}
771771

772+
// AppendFiltersItem returns a new list that is a copy of the given list, except
773+
// that the given item has been appended to the end of the list.
774+
func (c *CustomFuncs) AppendFiltersItem(
775+
filters memo.FiltersExpr, toAppend opt.ScalarExpr,
776+
) memo.FiltersExpr {
777+
newFilters := make(memo.FiltersExpr, len(filters)+1)
778+
copy(newFilters, filters)
779+
newFilters[len(filters)] = c.f.ConstructFiltersItem(toAppend)
780+
return newFilters
781+
}
782+
772783
// ReplaceFiltersItem returns a new list that is a copy of the given list,
773784
// except that the given search item has been replaced by the given replace
774785
// item. If the list contains the search item multiple times, then only the

pkg/sql/opt/norm/rules/decorrelate.opt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,54 @@
911911
(OutputCols2 $left $right)
912912
)
913913

914+
# HoistUnboundFilterFromExistsSubquery pulls a filter condition out of an
915+
# Exists subquery if the filter condition only depends on columns from the
916+
# outer query. This is useful because it allows other optimization rules to
917+
# apply to the filter which was previously hidden inside the subquery.
918+
[HoistUnboundFilterFromExistsSubquery, Normalize]
919+
(Select
920+
$input:*
921+
$filters:[
922+
...
923+
$item:(FiltersItem
924+
(Exists
925+
(Select
926+
$innerInput:*
927+
$innerFilters:[
928+
...
929+
$innerItem:(FiltersItem $unboundCond:*) &
930+
(IsBoundBy
931+
$innerItem
932+
$inputCols:(OutputCols $input)
933+
)
934+
...
935+
]
936+
)
937+
$existsPrivate:*
938+
)
939+
)
940+
...
941+
]
942+
)
943+
=>
944+
(Select
945+
$input
946+
(AppendFiltersItem
947+
(ReplaceFiltersItem
948+
$filters
949+
$item
950+
(Exists
951+
(Select
952+
$innerInput
953+
(RemoveFiltersItem $innerFilters $innerItem)
954+
)
955+
$existsPrivate
956+
)
957+
)
958+
$unboundCond
959+
)
960+
)
961+
914962
# HoistSelectExists extracts existential subqueries from Select filters,
915963
# turning them into semi-joins. This eliminates the subquery, which is often
916964
# expensive to execute and restricts the optimizer's plan choices.

0 commit comments

Comments
 (0)