Skip to content

Commit dcdd6ae

Browse files
terry1purcellclaude
andcommitted
cardinality: tighten full-range fast path for index estimation
Two fixes raised in the release-8.5 cherry-pick review (PR #66695) that also apply on master: 1. Move canSkipIndexEstimation ahead of IndexStatsIsInvalid so the full-range fast path short-circuits before IndexStatsIsInvalid queues an unnecessary async histogram load when the index stats are not fully loaded. Guarded by idx != nil so the pseudo fallback is preserved. 2. Reject ranges with LowExclude or HighExclude in isFullRangeIncludingNulls. A range like (NULL, +inf) drops the NULL endpoint and shrinks the result, so it must not trigger the fast path. Tighten TestCanSkipIndexEstimation to actually exercise these cases: model 10 NULL rows in the index histogram (NullCount=10) so the not-null estimate is strictly below RealtimeCount, switch the assertion to require.Less, and add a case covering (NULL, +inf) with an exclusive lower bound. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 296420e commit dcdd6ae

2 files changed

Lines changed: 54 additions & 17 deletions

File tree

pkg/planner/cardinality/row_count_index.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
4343
sc := sctx.GetSessionVars().StmtCtx
4444
idx := coll.GetIdx(idxID)
4545
recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID)
46+
// Fast-path: a full-range scan over a non-MV, non-partial index returns exactly
47+
// RealtimeCount regardless of histogram availability, so we can short-circuit
48+
// before IndexStatsIsInvalid — which would otherwise queue an unnecessary async
49+
// histogram load whenever the index stats are not fully loaded.
50+
if idx != nil && canSkipIndexEstimation(idx, indexRanges) {
51+
realtimeCnt, _ := coll.GetScaledRealtimeAndModifyCnt(idx)
52+
return statistics.DefaultRowEst(float64(realtimeCnt)), nil
53+
}
4654
if statistics.IndexStatsIsInvalid(sctx, idx, coll, idxID) {
4755
if hasColumnStats(sctx, coll, idxCols) && !ranger.HasFullRange(indexRanges, false) {
4856
count, maxCount, err = getPseudoRowCountWithPartialStats(sctx, coll, indexRanges, float64(coll.RealtimeCount), idxCols)
@@ -58,9 +66,6 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
5866
return result, err
5967
}
6068
realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
61-
if canSkipIndexEstimation(idx, indexRanges) {
62-
return statistics.DefaultRowEst(float64(realtimeCnt)), nil
63-
}
6469
if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
6570
count, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges)
6671
result = statistics.DefaultRowEst(count)
@@ -608,12 +613,18 @@ func canSkipIndexEstimation(idx *statistics.Index, indexRanges []*ranger.Range)
608613
}
609614

610615
// isFullRangeIncludingNulls checks if a single range covers all values including NULLs.
611-
// Unlike ranger.IsFullRange, this requires the low bound to be NULL (KindNull),
612-
// not KindMinNotNull, ensuring NULL rows are included in the count.
616+
// Unlike ranger.IsFullRange, this requires the low bound to be NULL (KindNull) inclusive,
617+
// not KindMinNotNull and not an exclusive lower bound, so NULL rows are guaranteed to be
618+
// included in the count.
613619
func isFullRangeIncludingNulls(ran *ranger.Range) bool {
614620
if len(ran.LowVal) != len(ran.HighVal) || len(ran.LowVal) == 0 {
615621
return false
616622
}
623+
// An exclusive bound on NULL (low) or +inf (high) would drop those endpoints
624+
// and shrink the range, so the fast path must not apply.
625+
if ran.LowExclude || ran.HighExclude {
626+
return false
627+
}
617628
for i := range ran.LowVal {
618629
if ran.LowVal[i].Kind() != types.KindNull {
619630
return false

pkg/planner/cardinality/selectivity_test.go

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -549,29 +549,39 @@ func TestCanSkipIndexEstimation(t *testing.T) {
549549
tblInfo := tb.Meta()
550550

551551
// Use mock stats so Idx2ColUniqueIDs is populated (required by getIndexRowCountForStatsV2).
552-
realtimeCount := int64(51) // 50 rows + 1 NULL
552+
// 50 distinct non-NULL values + 10 NULL rows, so the not-null range produces a
553+
// strictly smaller estimate than RealtimeCount and catches a buggy fast-path that
554+
// returns RealtimeCount for [MinNotNull,+inf).
555+
const nonNullCount = 50
556+
const nullCount = 10
557+
realtimeCount := int64(nonNullCount + nullCount)
553558
statsTbl := mockStatsTable(tblInfo, realtimeCount)
554-
colValues, err := generateIntDatum(1, 51)
559+
colValues, err := generateIntDatum(1, nonNullCount)
555560
require.NoError(t, err)
556561
for i := 1; i <= 2; i++ {
562+
colHist := mockStatsHistogram(int64(i), colValues, 1, types.NewFieldType(mysql.TypeLonglong))
563+
colHist.NullCount = nullCount
557564
statsTbl.SetCol(int64(i), &statistics.Column{
558-
Histogram: *mockStatsHistogram(int64(i), colValues, 1, types.NewFieldType(mysql.TypeLonglong)),
565+
Histogram: *colHist,
559566
Info: tblInfo.Columns[i-1],
560567
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
561568
StatsVer: 2,
562569
})
563570
}
564571
// Index histogram must store encoded key bytes (same as getIndexRowCountForStatsV2 uses for l/r).
565-
idxValues := make([]types.Datum, 51)
566-
for i := 0; i < 51; i++ {
572+
idxValues := make([]types.Datum, nonNullCount)
573+
for i := range idxValues {
567574
enc, err := codec.EncodeKey(time.UTC, nil, types.NewIntDatum(int64(i)))
568575
require.NoError(t, err)
569576
idxValues[i].SetBytes(enc)
570577
}
578+
idxHist := mockStatsHistogram(tblInfo.Indices[0].ID, idxValues, 1, types.NewFieldType(mysql.TypeBlob))
579+
idxHist.NullCount = nullCount
571580
statsTbl.SetIdx(tblInfo.Indices[0].ID, &statistics.Index{
572-
Histogram: *mockStatsHistogram(tblInfo.Indices[0].ID, idxValues, 1, types.NewFieldType(mysql.TypeBlob)),
573-
Info: tblInfo.Indices[0],
574-
StatsVer: 2,
581+
Histogram: *idxHist,
582+
Info: tblInfo.Indices[0],
583+
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
584+
StatsVer: 2,
575585
})
576586
generateMapsForMockStatsTbl(statsTbl)
577587

@@ -586,20 +596,36 @@ func TestCanSkipIndexEstimation(t *testing.T) {
586596
require.Equal(t, float64(realtimeCount), countResult.Est,
587597
"full range [NULL,+inf) should use fast path and return RealtimeCount")
588598

589-
// Full range excluding NULLs [MinNotNull, +inf) should NOT use fast path.
590-
// It goes through histogram estimation.
599+
// Full range excluding NULLs [MinNotNull, +inf) must NOT use the fast path.
600+
// With nullCount > 0, the histogram estimate must be strictly below RealtimeCount;
601+
// equality would mean the fast path was wrongly taken.
591602
fullNotNullRanges := ranger.FullNotNullRange()
592603
countResult2, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, fullNotNullRanges, nil)
593604
require.NoError(t, err)
594-
require.LessOrEqual(t, countResult2.Est, float64(realtimeCount),
595-
"full not-null range excludes NULLs, estimate should be <= RealtimeCount")
605+
require.Less(t, countResult2.Est, float64(realtimeCount),
606+
"full not-null range excludes %d NULL row(s), estimate must be < RealtimeCount", nullCount)
596607

597608
// Bounded range should NOT use fast path.
598609
boundedRanges := getRange(1, 10)
599610
countResult3, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, boundedRanges, nil)
600611
require.NoError(t, err)
601612
require.Less(t, countResult3.Est, float64(realtimeCount),
602613
"bounded range should use histogram estimation, not fast path")
614+
615+
// (NULL, +inf) with an exclusive lower bound drops the NULL endpoint, so the
616+
// fast path must not apply — otherwise the NULL row would be silently counted.
617+
var nullDatum types.Datum
618+
nullDatum.SetNull()
619+
exclusiveNullRanges := []*ranger.Range{{
620+
LowVal: []types.Datum{nullDatum},
621+
HighVal: []types.Datum{types.MaxValueDatum()},
622+
LowExclude: true,
623+
Collators: collate.GetBinaryCollatorSlice(1),
624+
}}
625+
countResult4, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, exclusiveNullRanges, nil)
626+
require.NoError(t, err)
627+
require.Less(t, countResult4.Est, float64(realtimeCount),
628+
"exclusive lower bound on NULL must drop the NULL row, estimate must be < RealtimeCount")
603629
}
604630

605631
func TestEstimationForUnknownValuesAfterModify(t *testing.T) {

0 commit comments

Comments
 (0)