executor, statistics: flush pending stats delta before analyze (pingcap#67939)

0xPoe · 0xPoe · commit dd0e7c87a4e6 · 2026-05-04T17:02:19.000+02:00
close pingcap#22934 (cherry picked from commit 42118f3)
diff --git a/pkg/executor/analyze.go b/pkg/executor/analyze.go
@@ -34,6 +34,7 @@ import (
 	"github.com/pingcap/tidb/pkg/kv"
 	"github.com/pingcap/tidb/pkg/metrics"
 	"github.com/pingcap/tidb/pkg/parser/ast"
+	pmodel "github.com/pingcap/tidb/pkg/parser/model"
 	"github.com/pingcap/tidb/pkg/planner/core"
 	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/sessionctx/variable"
@@ -84,6 +85,82 @@ const (
 	idxTask
 )
 
+// flushStatsDeltaForAnalyze flushes pending stats deltas for the tables whose column-analyze
+// tasks will capture base count / modify_count from mysql.stats_meta. Without this, a stale
+// pre-analyze delta can be applied later and double count rows or modifications.
+func flushStatsDeltaForAnalyze(ctx context.Context, sctx sessionctx.Context, plan *core.Analyze) error {
+	if len(plan.ColTasks) == 0 {
+		return nil
+	}
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+	targetIDs := collectAnalyzeStatsDeltaTargetIDsForTest(plan)
+	if len(targetIDs) == 0 {
+		return nil
+	}
+	return domain.GetDomain(sctx).StatsHandle().DumpStatsDeltaToKV(true, targetIDs...)
+}
+
+// collectStatsDeltaFlushObjectsForAnalyze returns the database-qualified table
+// objects whose stats deltas must be flushed before building column analyze
+// tasks. Column analyze captures base count / modify_count from mysql.stats_meta,
+// so each target table is included once even if it has multiple column tasks.
+func collectStatsDeltaFlushObjectsForAnalyze(plan *core.Analyze) []*ast.StatsObject {
+	flushObjects := make([]*ast.StatsObject, 0, len(plan.ColTasks))
+	type statsObjectKey struct {
+		dbName    string
+		tableName string
+	}
+	seenObjects := make(map[statsObjectKey]struct{}, len(plan.ColTasks))
+	appendFlushObject := func(task core.AnalyzeColumnsTask) {
+		dbName, tableName := task.DBName, task.TableName
+		if dbName == "" || tableName == "" {
+			intest.Assert(false, "analyze column task must have database-qualified table name")
+			return
+		}
+		key := statsObjectKey{dbName: dbName, tableName: tableName}
+		if _, ok := seenObjects[key]; ok {
+			return
+		}
+		seenObjects[key] = struct{}{}
+		flushObjects = append(flushObjects, &ast.StatsObject{
+			StatsObjectScope: ast.StatsObjectScopeTable,
+			DBName:           pmodel.NewCIStr(dbName),
+			TableName:        pmodel.NewCIStr(tableName),
+		})
+	}
+	for _, task := range plan.ColTasks {
+		appendFlushObject(task)
+	}
+	return flushObjects
+}
+
+func collectAnalyzeStatsDeltaTargetIDsForTest(plan *core.Analyze) []int64 {
+	targetIDs := make([]int64, 0, len(plan.ColTasks))
+	seenTargetIDs := make(map[int64]struct{}, len(plan.ColTasks))
+	appendTargetID := func(id int64) {
+		if _, ok := seenTargetIDs[id]; ok {
+			return
+		}
+		seenTargetIDs[id] = struct{}{}
+		targetIDs = append(targetIDs, id)
+	}
+	for _, task := range plan.ColTasks {
+		if task.TblInfo == nil {
+			intest.Assert(false, "analyze column task must have table info")
+			continue
+		}
+		appendTargetID(task.TblInfo.ID)
+		if partitionInfo := task.TblInfo.GetPartitionInfo(); partitionInfo != nil {
+			for _, def := range partitionInfo.Definitions {
+				appendTargetID(def.ID)
+			}
+		}
+	}
+	return targetIDs
+}
+
 // Next implements the Executor Next interface.
 // It will collect all the sample task and run them concurrently.
 func (e *AnalyzeExec) Next(ctx context.Context, _ *chunk.Chunk) error {
diff --git a/pkg/executor/analyze_utils_test.go b/pkg/executor/analyze_utils_test.go
@@ -15,9 +15,11 @@
 package executor
 
 import (
+	"context"
 	"fmt"
 	"testing"
 
+	"github.com/pingcap/tidb/pkg/planner/core"
 	"github.com/pingcap/tidb/pkg/util/dbterror/exeerrors"
 	"github.com/stretchr/testify/require"
 )
@@ -27,3 +29,34 @@ func TestGetAnalyzePanicErr(t *testing.T) {
 	errMsg := fmt.Sprintf("%s", getAnalyzePanicErr(exeerrors.ErrMemoryExceedForQuery.GenWithStackByArgs(123)))
 	require.NotContains(t, errMsg, `%!(EXTRA`)
 }
+
+func TestCollectStatsDeltaFlushObjectsForAnalyzeDottedNames(t *testing.T) {
+	plan := &core.Analyze{
+		ColTasks: []core.AnalyzeColumnsTask{
+			// Quoted identifiers may contain dots. These first two targets both
+			// stringify to "a.b.c" if db and table names are joined with ".".
+			{AnalyzeInfo: core.AnalyzeInfo{DBName: "a.b", TableName: "c"}},
+			{AnalyzeInfo: core.AnalyzeInfo{DBName: "a", TableName: "b.c"}},
+			// Keep the duplicate target deduped.
+			{AnalyzeInfo: core.AnalyzeInfo{DBName: "a", TableName: "b.c"}},
+		},
+	}
+
+	flushObjects := collectStatsDeltaFlushObjectsForAnalyze(plan)
+	targets := make([][2]string, 0, len(flushObjects))
+	for _, obj := range flushObjects {
+		targets = append(targets, [2]string{obj.DBName.O, obj.TableName.O})
+	}
+
+	require.ElementsMatch(t, [][2]string{
+		{"a.b", "c"},
+		{"a", "b.c"},
+	}, targets)
+}
+
+func TestCanBroadcastToTiDBRPCForTestRejectsInvalidEndpoints(t *testing.T) {
+	// Regression for next-gen realcluster tests: in-process domains can register
+	// multiple server infos with an empty IP/default :10080 but no TiDB RPC
+	// listener. Such targets must not take the broadcast path.
+	require.False(t, canBroadcastToTiDBRPCForTest(context.Background(), []string{"", ""}))
+}
diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go
@@ -3245,6 +3245,14 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) exec.Executor {
 	if b.ctx.GetSessionVars().InRestrictedSQL {
 		autoAnalyze = "auto "
 	}
+	// buildAnalyzeSamplingPushdown reads base count / modify_count from mysql.stats_meta
+	// while constructing column analyze tasks. Flush pending deltas first so the base
+	// values include pre-analyze changes and later delta dumps cannot double count them.
+	// TODO: Determine whether context.Background is appropriate here; if not, use the proper statement context.
+	if err := flushStatsDeltaForAnalyze(kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats), b.ctx, v); err != nil {
+		b.err = err
+		return nil
+	}
 	exprCtx := b.ctx.GetExprCtx()
 	for _, task := range v.ColTasks {
 		// ColumnInfos2ColumnsAndNames will use the `colInfos` to find the unique id for the column,
diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go
@@ -2321,7 +2321,6 @@ PARTITION BY RANGE ( a ) (
 	tableInfo := table.Meta()
 	pi := tableInfo.GetPartitionInfo()
 	require.NotNil(t, pi)
-
 	// analyze partition under static mode with options
 	tk.MustExec("analyze table t partition p0 columns a,c with 1 topn, 3 buckets")
 	tk.MustQuery("select * from t where b > 1 and c > 1")
@@ -2333,7 +2332,10 @@ PARTITION BY RANGE ( a ) (
 	require.Equal(t, 3, len(p0.GetCol(tableInfo.Columns[0].ID).Buckets))
 	require.Equal(t, 3, len(p0.GetCol(tableInfo.Columns[2].ID).Buckets))
 	require.Equal(t, 0, len(p1.GetCol(tableInfo.Columns[0].ID).Buckets))
-	require.Equal(t, 0, len(tbl.GetCol(tableInfo.Columns[0].ID).Buckets))
+	// Static partition analyze may flush pending partition deltas into the
+	// logical/global stats_meta row, but it must not build a global column
+	// histogram. In that meta-only global stats case, the global column is absent.
+	require.Nil(t, tbl.GetCol(tableInfo.Columns[0].ID))
 	rs := tk.MustQuery("select buckets,topn from mysql.analyze_options where table_id=" + strconv.FormatInt(pi.Definitions[0].ID, 10))
 	require.Equal(t, 1, len(rs.Rows()))
 	require.Equal(t, "3", rs.Rows()[0][0])
diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
@@ -311,7 +311,6 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
 	}
 	testKit.MustExec("analyze table t")
 	h := dom.StatsHandle()
-	require.Nil(t, h.DumpStatsDeltaToKV(true))
 
 	table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
 	require.NoError(t, err)
@@ -336,11 +335,13 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
 	require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
 
 	statsTblNew := h.GetPhysicalTableStats(table.Meta().ID, table.Meta())
-	// Search for a not found value based upon statistics - count should be >= 10 and <=40
+	// Search for a not found value based upon post-analyze modifications. It
+	// should be higher than the no-modification fallback, but lower than a value
+	// already present in the analyzed histogram.
 	count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblNew.RealtimeCount, statsTblNew.ModifyCount, false)
 	require.NoError(t, err)
-	require.Truef(t, count < 45, "expected: between 0 to 45, got: %v", count)
-	require.Truef(t, count > 0, "expected: between 0 to 45, got: %v", count)
+	require.Greater(t, count, 1.0)
+	require.Less(t, count, 10.0)
 }
 
 func TestNewIndexWithoutStats(t *testing.T) {
diff --git a/pkg/statistics/handle/globalstats/global_stats_test.go b/pkg/statistics/handle/globalstats/global_stats_test.go
@@ -816,10 +816,11 @@ func TestGlobalStats(t *testing.T) {
 		"  └─IndexRangeScan 1.00 cop[tikv] table:t, partition:p1, index:a(a) range:(3,+inf], keep order:false"))
 
 	// When we turned on the switch, we found that pseudo-stats will be used in the plan instead of `Union`.
+	// The pseudo estimate is based on the stats_meta counts flushed before analyze.
 	tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
 	tk.MustQuery("explain format = 'brief' select a from t where a > 3;").Check(testkit.Rows(
-		"IndexReader 3333.33 root partition:all index:IndexRangeScan",
-		"└─IndexRangeScan 3333.33 cop[tikv] table:t, index:a(a) range:(3,+inf], keep order:false, stats:pseudo"))
+		"IndexReader 1.67 root partition:all index:IndexRangeScan",
+		"└─IndexRangeScan 1.67 cop[tikv] table:t, index:a(a) range:(3,+inf], keep order:false, stats:pseudo"))
 
 	// Execute analyze again without error and can generate global-stats.
 	// And when executing related queries, neither Union nor pseudo-stats are used.
diff --git a/pkg/statistics/handle/handletest/handle_test.go b/pkg/statistics/handle/handletest/handle_test.go
@@ -1234,6 +1234,29 @@ func TestIncrementalModifyCountUpdate(t *testing.T) {
 	}
 }
 
+func TestFlushPendingStatsDeltaBeforeAnalyze(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int)")
+
+	tbl, err := dom.InfoSchema().TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t"))
+	require.NoError(t, err)
+	tableID := tbl.Meta().ID
+
+	tk.MustExec("insert into t values(1),(2),(3),(4),(5)")
+
+	tk.MustExec("analyze table t")
+	tk.MustQuery(fmt.Sprintf("select count, modify_count from mysql.stats_meta where table_id = %d", tableID)).Check(testkit.Rows(
+		"5 0",
+	))
+
+	tk.MustExec("flush stats_delta test.t")
+	tk.MustQuery(fmt.Sprintf("select count, modify_count from mysql.stats_meta where table_id = %d", tableID)).Check(testkit.Rows(
+		"5 0",
+	))
+}
+
 func TestRecordHistoricalStatsToStorage(t *testing.T) {
 	store, dom := testkit.CreateMockStoreAndDomain(t)
 	tk := testkit.NewTestKit(t, store)
diff --git a/pkg/statistics/handle/storage/dump_test.go b/pkg/statistics/handle/storage/dump_test.go
@@ -155,19 +155,27 @@ func TestDumpGlobalStats(t *testing.T) {
 	tk.MustExec("insert into t values (1), (2)")
 	tk.MustExec("analyze table t")
 
-	// global-stats is not existed
+	// Static partition analyze should not generate global histograms. The
+	// pre-analyze stats-delta flush may still create a global stats_meta entry.
 	stats := getStatsJSON(t, dom, "test", "t")
 	require.NotNil(t, stats.Partitions["p0"])
 	require.NotNil(t, stats.Partitions["p1"])
-	require.Nil(t, stats.Partitions[handleutil.TiDBGlobalStats])
+	globalStats := stats.Partitions[handleutil.TiDBGlobalStats]
+	if globalStats != nil {
+		require.Empty(t, globalStats.Columns)
+		require.Empty(t, globalStats.Indices)
+	}
 
 	// global-stats is existed
 	tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'")
 	tk.MustExec("analyze table t")
 	stats = getStatsJSON(t, dom, "test", "t")
 	require.NotNil(t, stats.Partitions["p0"])
 	require.NotNil(t, stats.Partitions["p1"])
-	require.NotNil(t, stats.Partitions[handleutil.TiDBGlobalStats])
+	globalStats = stats.Partitions[handleutil.TiDBGlobalStats]
+	require.NotNil(t, globalStats)
+	require.NotEmpty(t, globalStats.Columns)
+	require.NotEmpty(t, globalStats.Indices)
 }
 
 func TestLoadGlobalStats(t *testing.T) {
diff --git a/pkg/statistics/handle/storage/gc_test.go b/pkg/statistics/handle/storage/gc_test.go
@@ -90,11 +90,13 @@ func TestGCPartition(t *testing.T) {
 
 		testKit.MustExec("drop table t")
 		require.Nil(t, h.GCStats(dom.InfoSchema(), ddlLease))
-		testKit.MustQuery("select count(*) from mysql.stats_meta").Check(testkit.Rows("2"))
+		testKit.MustQuery("select count(*) from mysql.stats_meta").Check(testkit.Rows("3"))
 		testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("0"))
 		testKit.MustQuery("select count(*) from mysql.stats_buckets").Check(testkit.Rows("0"))
+		// FIXME(#68076): The remaining row is the logical table's meta-only stats row. The
+		// normal GC version-window scan does not revisit it after the table is dropped.
 		require.Nil(t, h.GCStats(dom.InfoSchema(), ddlLease))
-		testKit.MustQuery("select count(*) from mysql.stats_meta").Check(testkit.Rows("0"))
+		testKit.MustQuery("select count(*) from mysql.stats_meta").Check(testkit.Rows("1"))
 	})
 }
 
diff --git a/pkg/testkit/testkit.go b/pkg/testkit/testkit.go
@@ -624,11 +624,10 @@ func containGlobal(rs *Result) bool {
 	return false
 }
 
-// MustNoGlobalStats checks if there is no global stats.
+// MustNoGlobalStats checks if there are no global histograms or buckets.
+// It intentionally ignores global stats_meta rows, because stats-delta flushes
+// may maintain logical/global row counts even when no global histograms exist.
 func (tk *TestKit) MustNoGlobalStats(table string) {
-	if containGlobal(tk.MustQuery("show stats_meta where table_name like '" + table + "'")) {
-		tk.require.Fail("global stats should not be found")
-	}
 	if containGlobal(tk.MustQuery("show stats_buckets where table_name like '" + table + "'")) {
 		tk.require.Fail("global stats should not be found")
 	}
diff --git a/pkg/ttl/ttlworker/job_manager_integration_test.go b/pkg/ttl/ttlworker/job_manager_integration_test.go
@@ -225,19 +225,17 @@ func TestTTLAutoAnalyze(t *testing.T) {
 	tk.MustExec("use test")
 	tk.MustExec("create table t (id int, created_at datetime, index idx(id, created_at))")
 
-	// insert ten rows, the 2,3,4,6,9,10 of them are expired
-	for i := 1; i <= 10; i++ {
-		t := time.Now()
-		if i%2 == 0 || i%3 == 0 {
-			t = t.Add(-time.Hour * 48)
-		}
-
-		tk.MustExec("insert into t values(?, ?)", i, t.Format(time.RFC3339))
+	for i := 1; i <= 3; i++ {
+		tk.MustExec("insert into t values(?, ?)", i, time.Now().Format(time.RFC3339))
 	}
 	tk.MustExec("analyze table t")
 	rows := tk.MustQuery("show stats_meta").Rows()
 	require.Equal(t, rows[0][4], "0")
-	require.Equal(t, rows[0][5], "10")
+	require.Equal(t, rows[0][5], "3")
+
+	for i := 4; i <= 10; i++ {
+		tk.MustExec("insert into t values(?, ?)", i, time.Now().Add(-time.Hour*48).Format(time.RFC3339))
+	}
 	tk.MustExec("alter table t ttl = `created_at` + interval 1 day")
 
 	retryTime := 300

Original file line number	Diff line number	Diff line change
`@@ -624,11 +624,10 @@ func containGlobal(rs *Result) bool {`
`624`	`624`	`return false`
`625`	`625`	`}`
`626`	`626`
`627`		`-// MustNoGlobalStats checks if there is no global stats.`
	`627`	`+// MustNoGlobalStats checks if there are no global histograms or buckets.`
	`628`	`+// It intentionally ignores global stats_meta rows, because stats-delta flushes`
	`629`	`+// may maintain logical/global row counts even when no global histograms exist.`
`628`	`630`	`func (tk *TestKit) MustNoGlobalStats(table string) {`
`629`		`- if containGlobal(tk.MustQuery("show stats_meta where table_name like '" + table + "'")) {`
`630`		`- tk.require.Fail("global stats should not be found")`
`631`		`- }`
`632`	`631`	`if containGlobal(tk.MustQuery("show stats_buckets where table_name like '" + table + "'")) {`
`633`	`632`	`tk.require.Fail("global stats should not be found")`
`634`	`633`	`}`