Skip to content

Commit d65f4df

Browse files
authored
pkg/planner: remove redundant visited table state in stats usage collector (#68166)
close #67912
1 parent 4598d48 commit d65f4df

1 file changed

Lines changed: 19 additions & 26 deletions

File tree

pkg/planner/core/rule/collect_column_stats_usage.go

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,9 @@ type columnStatsUsageCollector struct {
4545
// cols is used to store columns collected from expressions and saves some allocation.
4646
cols []*expression.Column
4747

48-
// visitedPhysTblIDs all ds.PhysicalTableID that have been visited.
49-
// It's always collected, even collectHistNeededColumns is not set.
50-
visitedPhysTblIDs *intset.FastIntSet
51-
52-
// collectVisitedTable indicates whether to collect visited table
53-
collectVisitedTable bool
54-
// visitedtbls indicates the visited table
55-
visitedtbls map[int64]struct{}
56-
48+
// visitedLogicalTblIDs is always collected for stats-load logic and reused by plan replayer capture.
49+
// it currently stores logical table ID (DataSource.TableInfo.ID), not ds.PhysicalTableID.
50+
visitedLogicalTblIDs *intset.FastIntSet
5751
// tblID2PartitionIDs is used for tables with static pruning mode.
5852
// Note that we've no longer suggested to use static pruning mode.
5953
tblID2PartitionIDs map[int64][]int64
@@ -67,20 +61,16 @@ type columnStatsUsageCollector struct {
6761
colSet map[int64]struct{}
6862
}
6963

70-
func newColumnStatsUsageCollector(enabledPlanCapture bool, collectIndexPruningCols bool) *columnStatsUsageCollector {
64+
func newColumnStatsUsageCollector(collectIndexPruningCols bool) *columnStatsUsageCollector {
7165
set := intset.NewFastIntSet()
7266
collector := &columnStatsUsageCollector{
7367
// Pre-allocate a slice to reduce allocation, 8 doesn't have special meaning.
74-
cols: make([]*expression.Column, 0, 8),
75-
visitedPhysTblIDs: &set,
76-
tblID2PartitionIDs: make(map[int64][]int64),
68+
cols: make([]*expression.Column, 0, 8),
69+
visitedLogicalTblIDs: &set,
70+
tblID2PartitionIDs: make(map[int64][]int64),
7771
}
7872
collector.predicateCols = make(map[model.TableItemID]bool)
7973
collector.colMap = make(map[int64]map[model.TableItemID]struct{})
80-
if enabledPlanCapture {
81-
collector.collectVisitedTable = true
82-
collector.visitedtbls = map[int64]struct{}{}
83-
}
8474
if collectIndexPruningCols {
8575
collector.interestingColsByDS = make(map[*logicalop.DataSource][]*expression.Column)
8676
collector.colSet = make(map[int64]struct{})
@@ -145,10 +135,7 @@ func (c *columnStatsUsageCollector) collectPredicateColumnsForDataSource(askedCo
145135
// For partition tables, no matter whether it is static or dynamic pruning mode, we use table ID rather than partition ID to
146136
// set TableColumnID.TableID. In this way, we keep the set of predicate columns consistent between different partitions and global table.
147137
tblID := ds.TableInfo.ID
148-
if c.collectVisitedTable {
149-
c.visitedtbls[tblID] = struct{}{}
150-
}
151-
c.visitedPhysTblIDs.Insert(int(tblID))
138+
c.visitedLogicalTblIDs.Insert(int(tblID))
152139
if tblID != ds.PhysicalTableID {
153140
c.tblID2PartitionIDs[tblID] = append(c.tblID2PartitionIDs[tblID], ds.PhysicalTableID)
154141
}
@@ -428,7 +415,8 @@ func (c *columnStatsUsageCollector) collectFromPlan(askedColGroups [][]*expressi
428415
// predicate indicates whether to collect predicate columns and histNeeded indicates whether to collect histogram-needed columns.
429416
// The predicate columns are always collected while the histNeeded columns are depending on whether we use sync load.
430417
// First return value: predicate columns
431-
// Second return value: the visited table IDs(For partition table, we only record its global meta ID. The meta ID of each partition will be recorded in tblID2PartitionIDs)
418+
// Second return value: visited logical table IDs. For partitioned tables, we only record logical table IDs;
419+
// each partition's physical table ID is recorded in tblID2PartitionIDs.
432420
// Third return value: the visited partition IDs. Used for static partition pruning.
433421
// Forth return value: the number of operators in the logical plan.
434422
// TODO: remove the third return value when the static partition pruning is totally deprecated.
@@ -442,10 +430,15 @@ func CollectColumnStatsUsage(lp base.LogicalPlan) (
442430
threshold := lp.SCtx().GetSessionVars().OptIndexPruneThreshold
443431
collectIndexPruningCols := threshold >= 0
444432

445-
collector := newColumnStatsUsageCollector(lp.SCtx().GetSessionVars().IsPlanReplayerCaptureEnabled(), collectIndexPruningCols)
433+
enablePlanCapture := lp.SCtx().GetSessionVars().IsPlanReplayerCaptureEnabled()
434+
collector := newColumnStatsUsageCollector(collectIndexPruningCols)
446435
collector.collectFromPlan(nil, lp, nil, nil)
447-
if collector.collectVisitedTable {
448-
recordTableRuntimeStats(lp.SCtx(), collector.visitedtbls)
436+
if enablePlanCapture {
437+
visitedTbls := make(map[int64]struct{}, collector.visitedLogicalTblIDs.Len())
438+
collector.visitedLogicalTblIDs.ForEach(func(tblID int) {
439+
visitedTbls[int64(tblID)] = struct{}{}
440+
})
441+
recordTableRuntimeStats(lp.SCtx(), visitedTbls)
449442
}
450443

451444
// Populate DataSource field with the collected interesting columns (if index pruning is enabled)
@@ -455,5 +448,5 @@ func CollectColumnStatsUsage(lp base.LogicalPlan) (
455448
}
456449
}
457450

458-
return collector.predicateCols, collector.visitedPhysTblIDs, collector.tblID2PartitionIDs, collector.operatorNum
451+
return collector.predicateCols, collector.visitedLogicalTblIDs, collector.tblID2PartitionIDs, collector.operatorNum
459452
}

0 commit comments

Comments
 (0)