Skip to content

Commit 68aae64

Browse files
committed
move pruning of NO_NULL_INDEX to deriveStats
Signed-off-by: Yang Keao <[email protected]>
1 parent 6033c4e commit 68aae64

File tree

4 files changed

+138
-30
lines changed

4 files changed

+138
-30
lines changed

pkg/planner/core/find_best_task.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,37 +1163,7 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
11631163
candidates = append(candidates, getIndexMergeCandidate(ds, path, prop))
11641164
continue
11651165
}
1166-
// If the index contains NO_NULL_INDEX column, it may not contain all rows
1167-
if path.Index != nil && len(path.Index.NoNullIdxColOffsets) > 0 {
1168-
includeNullRange := false
1169-
1170-
for _, col := range path.Index.NoNullIdxColOffsets {
1171-
for _, ran := range path.Ranges {
1172-
if len(ran.LowVal) <= col || len(ran.HighVal) <= col ||
1173-
ran.LowVal[col].IsNull() || ran.HighVal[col].IsNull() {
1174-
includeNullRange = true
1175-
break
1176-
}
1177-
}
1178-
1179-
if includeNullRange {
1180-
// If the condition can make sure the column is not null, we can also use this index.
1181-
// TODO: maybe only considering the conditions except accessConds is enough, because the ranges
1182-
// don't contain NULL.
1183-
if ranger.CheckColumnIsNotNullWithCNFConditions(ds.SCtx(), path.FullIdxCols[col], ds.AllConds) {
1184-
includeNullRange = false
1185-
continue
1186-
}
11871166

1188-
break
1189-
}
1190-
}
1191-
1192-
// conditions that the column is not null
1193-
if includeNullRange {
1194-
continue
1195-
}
1196-
}
11971167
// if we already know the range of the scan is empty, just return a TableDual
11981168
if len(path.Ranges) == 0 {
11991169
return []*candidatePath{{path: path}}

pkg/planner/core/stats.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func deriveStats4DataSource(lp base.LogicalPlan, colGroups [][]*expression.Colum
123123
ds.SetStats(ds.TableStats.Scale(selectivity))
124124
return ds.StatsInfo(), nil
125125
}
126+
126127
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
127128
debugtrace.EnterContextCommon(ds.SCtx())
128129
defer debugtrace.LeaveContextCommon(ds.SCtx())
@@ -156,6 +157,8 @@ func deriveStats4DataSource(lp base.LogicalPlan, colGroups [][]*expression.Colum
156157
return nil, err
157158
}
158159

160+
removeInvalidPathsForDataSource(ds)
161+
159162
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
160163
debugTraceAccessPaths(ds.SCtx(), ds.PossibleAccessPaths)
161164
}
@@ -168,6 +171,73 @@ func deriveStats4DataSource(lp base.LogicalPlan, colGroups [][]*expression.Colum
168171
return ds.StatsInfo(), nil
169172
}
170173

174+
// checkNoNullIndexForPath checks whether a path is valid considering `NO_NULL_INDEX`
175+
func checkNoNullIndexForPath(ds *logicalop.DataSource, path *util.AccessPath, conditions []expression.Expression) (valid bool) {
176+
includeNullRange := false
177+
178+
for _, col := range path.Index.NoNullIdxColOffsets {
179+
for _, ran := range path.Ranges {
180+
if len(ran.LowVal) <= col || len(ran.HighVal) <= col ||
181+
ran.LowVal[col].IsNull() || ran.HighVal[col].IsNull() {
182+
includeNullRange = true
183+
break
184+
}
185+
}
186+
187+
if includeNullRange {
188+
// If the condition can make sure the column is not null, we can also use this index.
189+
// TODO: maybe only considering the conditions except accessConds is enough, because the ranges
190+
// do contain NULL.
191+
if ranger.CheckColumnIsNotNullWithCNFConditions(ds.SCtx(), path.FullIdxCols[col], conditions) {
192+
includeNullRange = false
193+
continue
194+
}
195+
196+
break
197+
}
198+
}
199+
200+
return !includeNullRange
201+
}
202+
203+
// removeInvalidPaths will remove invalid paths from PossibleAccessPaths.
204+
// Some paths are not available because they may use index which contains NO_NULL_INDEX column,
205+
// but cannot make sure the column is not null.
206+
func removeInvalidPathsForDataSource(ds *logicalop.DataSource) {
207+
for i := len(ds.PossibleAccessPaths) - 1; i >= 0; i-- {
208+
path := ds.PossibleAccessPaths[i]
209+
210+
if path.IsTablePath() {
211+
continue
212+
}
213+
214+
// If the index contains NO_NULL_INDEX column, it may not contain all rows
215+
if path.Index != nil && len(path.Index.NoNullIdxColOffsets) > 0 {
216+
if !checkNoNullIndexForPath(ds, path, ds.AllConds) {
217+
ds.PossibleAccessPaths = slices.Delete(ds.PossibleAccessPaths, i, i+1)
218+
continue
219+
}
220+
}
221+
222+
if path.PartialIndexPaths != nil {
223+
// consider whether this path is valid for the `NO_NULL_INDEX` column.
224+
isValid := true
225+
for _, partialPath := range path.PartialIndexPaths {
226+
if partialPath.Index != nil && len(partialPath.Index.NoNullIdxColOffsets) > 0 {
227+
if !checkNoNullIndexForPath(ds, partialPath, append(partialPath.IndexFilters, path.TableFilters...)) {
228+
isValid = false
229+
break
230+
}
231+
}
232+
}
233+
if !isValid {
234+
ds.PossibleAccessPaths = slices.Delete(ds.PossibleAccessPaths, i, i+1)
235+
continue
236+
}
237+
}
238+
}
239+
}
240+
171241
func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression) error {
172242
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
173243
debugtrace.EnterContextCommon(ds.SCtx())

tests/integrationtest/r/planner/core/integration.result

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4391,3 +4391,42 @@ IndexLookUp 9980.01 root
43914391
select * from t use index (idx3) where col1 is not null and col3 is not null;
43924392
id col1 col2 col3
43934393
3 1 NULL 1
4394+
drop table if exists t;
4395+
create table t (id int primary key, col1 int no_null_index, col2 int, col3 int);
4396+
alter table t add index idx2(col1, col2);
4397+
alter table t add index idx3(col1, col3);
4398+
insert into t values (2, null, 1, 1);
4399+
select * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
4400+
id col1 col2 col3
4401+
2 NULL 1 1
4402+
select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
4403+
Error 1815 (HY000): Internal : Can't find a proper physical plan for this query
4404+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
4405+
Error 1815 (HY000): Internal : Can't find a proper physical plan for this query
4406+
drop table if exists t;
4407+
create table t (id int primary key, col1 int no_null_index, col2 int no_null_index);
4408+
alter table t add index idx1(col1);
4409+
alter table t add index idx2(col2);
4410+
insert into t values (1, 1, NULL);
4411+
insert into t values (2, NULL, 1);
4412+
select /*+ USE_INDEX_MERGE(t, idx1, idx2) */ * FROM t WHERE col1 > 5 or col2 > 5;
4413+
id col1 col2
4414+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx1, idx2) */ * FROM t WHERE col1 > 5 or col2 > 5;
4415+
id estRows task access object operator info
4416+
IndexMerge 5555.56 root type: union
4417+
├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t, index:idx1(col1) range:(5,+inf], keep order:false, stats:pseudo
4418+
├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t, index:idx2(col2) range:(5,+inf], keep order:false, stats:pseudo
4419+
└─TableRowIDScan(Probe) 5555.56 cop[tikv] table:t keep order:false, stats:pseudo
4420+
drop table if exists t;
4421+
create table t (id int primary key, col1 int no_null_index, col2 json, col3 json);
4422+
alter table t add index idx2(col1, (CAST(col2->'$.path' AS SIGNED ARRAY)) );
4423+
alter table t add index idx3(col1, (CAST(col3->'$.path' AS SIGNED ARRAY)) );
4424+
insert into t values (2, null, '{"path":[1]}', '{"path":[1]}');
4425+
select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and json_contains(col2->'$.path', '1') and json_contains(col3->'$.path', '1');
4426+
id col1 col2 col3
4427+
2 NULL {"path": [1]} {"path": [1]}
4428+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and json_contains(col2->'$.path', '1') and json_contains(col3->'$.path', '1');
4429+
id estRows task access object operator info
4430+
TableReader 8.00 root data:Selection
4431+
└─Selection 8.00 cop[tikv] isnull(planner__core__integration.t.col1), json_contains(json_extract(planner__core__integration.t.col2, "$.path"), cast("1", json BINARY)), json_contains(json_extract(planner__core__integration.t.col3, "$.path"), cast("1", json BINARY))
4432+
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo

tests/integrationtest/t/planner/core/integration.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,3 +2407,32 @@ explain format='brief' select * from t use index (idx1) where col1 > 0 and col3
24072407
select * from t use index (idx2) where col1 is not null and col3 is not null;
24082408
explain format='brief' select * from t use index (idx2) where col1 is not null and col3 is not null;
24092409
select * from t use index (idx3) where col1 is not null and col3 is not null;
2410+
2411+
# TestNoNullIndexWithIndexMerge
2412+
drop table if exists t;
2413+
create table t (id int primary key, col1 int no_null_index, col2 int, col3 int);
2414+
alter table t add index idx2(col1, col2);
2415+
alter table t add index idx3(col1, col3);
2416+
insert into t values (2, null, 1, 1);
2417+
select * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
2418+
-- error 1815
2419+
select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
2420+
-- error 1815
2421+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and col2 < 100 and col3 < 100;
2422+
2423+
drop table if exists t;
2424+
create table t (id int primary key, col1 int no_null_index, col2 int no_null_index);
2425+
alter table t add index idx1(col1);
2426+
alter table t add index idx2(col2);
2427+
insert into t values (1, 1, NULL);
2428+
insert into t values (2, NULL, 1);
2429+
select /*+ USE_INDEX_MERGE(t, idx1, idx2) */ * FROM t WHERE col1 > 5 or col2 > 5;
2430+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx1, idx2) */ * FROM t WHERE col1 > 5 or col2 > 5;
2431+
2432+
drop table if exists t;
2433+
create table t (id int primary key, col1 int no_null_index, col2 json, col3 json);
2434+
alter table t add index idx2(col1, (CAST(col2->'$.path' AS SIGNED ARRAY)) );
2435+
alter table t add index idx3(col1, (CAST(col3->'$.path' AS SIGNED ARRAY)) );
2436+
insert into t values (2, null, '{"path":[1]}', '{"path":[1]}');
2437+
select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and json_contains(col2->'$.path', '1') and json_contains(col3->'$.path', '1');
2438+
explain format='brief' select /*+ USE_INDEX_MERGE(t, idx2, idx3) */ * FROM t WHERE col1 is null and json_contains(col2->'$.path', '1') and json_contains(col3->'$.path', '1');

0 commit comments

Comments
 (0)