Skip to content

Commit 78c20a0

Browse files
authored
br: adjust auto analyze (#60994)
ref #60374
1 parent c4ff9c3 commit 78c20a0

File tree

4 files changed

+27
-48
lines changed

4 files changed

+27
-48
lines changed

br/pkg/restore/snap_client/pipeline_items.go

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ type PipelineContext struct {
9090
LogProgress bool
9191
ChecksumConcurrency uint
9292
StatsConcurrency uint
93-
AutoAnalyze bool
9493

9594
// pipeline item tool client
9695
KvClient kv.Client
@@ -108,7 +107,9 @@ func (rc *SnapClient) RestorePipeline(ctx context.Context, plCtx PipelineContext
108107
if plCtx.Checksum {
109108
progressLen += int64(len(createdTables))
110109
}
111-
progressLen += int64(len(createdTables)) // for pipeline item - update stats meta
110+
if plCtx.LoadStats {
111+
progressLen += int64(len(createdTables))
112+
}
112113
if plCtx.WaitTiflashReady {
113114
progressLen += int64(len(createdTables))
114115
}
@@ -124,7 +125,9 @@ func (rc *SnapClient) RestorePipeline(ctx context.Context, plCtx PipelineContext
124125
}
125126

126127
// pipeline update meta and load stats
127-
rc.registerUpdateMetaAndLoadStats(handlerBuilder, plCtx.ExtStorage, updateCh, plCtx.StatsConcurrency, plCtx.AutoAnalyze, plCtx.LoadStats)
128+
if plCtx.LoadStats {
129+
rc.registerUpdateMetaAndLoadStats(handlerBuilder, plCtx.ExtStorage, updateCh, plCtx.StatsConcurrency)
130+
}
128131

129132
// pipeline wait Tiflash synced
130133
if plCtx.WaitTiflashReady {
@@ -312,13 +315,11 @@ const statsMetaItemBufferSize = 3000
312315

313316
type statsMetaItemBuffer struct {
314317
sync.Mutex
315-
autoAnalyze bool
316318
metaUpdates []statstypes.MetaUpdate
317319
}
318320

319-
func NewStatsMetaItemBuffer(autoAnalyze bool) *statsMetaItemBuffer {
321+
func NewStatsMetaItemBuffer() *statsMetaItemBuffer {
320322
return &statsMetaItemBuffer{
321-
autoAnalyze: autoAnalyze,
322323
metaUpdates: make([]statstypes.MetaUpdate, 0, statsMetaItemBufferSize),
323324
}
324325
}
@@ -355,10 +356,7 @@ func (buffer *statsMetaItemBuffer) TryUpdateMetas(ctx context.Context, statsHand
355356
item := statstypes.MetaUpdate{
356357
PhysicalID: physicalID,
357358
Count: count,
358-
ModifyCount: 0,
359-
}
360-
if buffer.autoAnalyze {
361-
item.ModifyCount = count
359+
ModifyCount: count,
362360
}
363361
metaUpdates := buffer.appendItem(item)
364362
if len(metaUpdates) == 0 {
@@ -372,16 +370,14 @@ func (rc *SnapClient) registerUpdateMetaAndLoadStats(
372370
s storage.ExternalStorage,
373371
updateCh glue.Progress,
374372
statsConcurrency uint,
375-
autoAnalyze bool,
376-
loadStats bool,
377373
) {
378374
statsHandler := rc.dom.StatsHandle()
379-
buffer := NewStatsMetaItemBuffer(autoAnalyze)
375+
buffer := NewStatsMetaItemBuffer()
380376

381377
builder.RegisterPipelineTask("Update Stats", statsConcurrency, func(c context.Context, tbl *CreatedTable) error {
382378
oldTable := tbl.OldTable
383379
var statsErr error = nil
384-
if loadStats && oldTable.Stats != nil {
380+
if oldTable.Stats != nil {
385381
log.Info("start loads analyze after validate checksum",
386382
zap.Int64("old id", oldTable.Info.ID),
387383
zap.Int64("new id", tbl.Table.ID),
@@ -395,7 +391,7 @@ func (rc *SnapClient) registerUpdateMetaAndLoadStats(
395391
zap.Stringer("table", oldTable.Info.Name),
396392
zap.Stringer("db", oldTable.DB.Name),
397393
zap.Duration("cost", time.Since(start)))
398-
} else if loadStats && len(oldTable.StatsFileIndexes) > 0 {
394+
} else if len(oldTable.StatsFileIndexes) > 0 {
399395
log.Info("start to load statistic data for each partition",
400396
zap.Int64("old id", oldTable.Info.ID),
401397
zap.Int64("new id", tbl.Table.ID),
@@ -411,7 +407,7 @@ func (rc *SnapClient) registerUpdateMetaAndLoadStats(
411407
zap.Duration("cost", time.Since(start)))
412408
}
413409

414-
if statsErr != nil || !loadStats || (oldTable.Stats == nil && len(oldTable.StatsFileIndexes) == 0) {
410+
if statsErr != nil || (oldTable.Stats == nil && len(oldTable.StatsFileIndexes) == 0) {
415411
// Not need to return err when failed because of update analysis-meta
416412
log.Info("start update metas", zap.Stringer("table", oldTable.Info.Name), zap.Stringer("db", oldTable.DB.Name))
417413
// get the the number of rows of each partition

br/pkg/task/common_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ func expectedDefaultRestoreConfig() RestoreConfig {
316316
},
317317
NoSchema: false,
318318
LoadStats: true,
319-
AutoAnalyze: true,
320319
PDConcurrency: 0x1,
321320
StatsConcurrency: 0xc,
322321
BatchFlushInterval: 16000000000,

br/pkg/task/restore.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ const (
7474
FlagPDConcurrency = "pd-concurrency"
7575
// FlagStatsConcurrency controls concurrency to restore statistic.
7676
FlagStatsConcurrency = "stats-concurrency"
77-
// FlagAutoAnalyze corresponds to the column `modify_count` of table `mysql.stats_meta`.
78-
FlagAutoAnalyze = "auto-analyze"
7977
// FlagBatchFlushInterval controls after how long the restore batch would be auto sended.
8078
FlagBatchFlushInterval = "batch-flush-interval"
8179
// FlagDdlBatchSize controls batch ddl size to create a batch of tables
@@ -174,7 +172,6 @@ func DefineRestoreCommonFlags(flags *pflag.FlagSet) {
174172
"(deprecated) concurrency pd-relative operations like split & scatter.")
175173
flags.Uint(FlagStatsConcurrency, defaultStatsConcurrency,
176174
"concurrency to restore statistic")
177-
flags.Bool(FlagAutoAnalyze, true, "trigger tidb analyze priority queue to analyze table")
178175
flags.Duration(FlagBatchFlushInterval, defaultBatchFlushInterval,
179176
"after how long a restore batch would be auto sent.")
180177
flags.Uint(FlagDdlBatchSize, defaultFlagDdlBatchSize,
@@ -244,7 +241,6 @@ type RestoreConfig struct {
244241
LoadStats bool `json:"load-stats" toml:"load-stats"`
245242
PDConcurrency uint `json:"pd-concurrency" toml:"pd-concurrency"`
246243
StatsConcurrency uint `json:"stats-concurrency" toml:"stats-concurrency"`
247-
AutoAnalyze bool `json:"auto-analyze" toml:"auto-analyze"`
248244
BatchFlushInterval time.Duration `json:"batch-flush-interval" toml:"batch-flush-interval"`
249245
// DdlBatchSize use to define the size of batch ddl to create tables
250246
DdlBatchSize uint `json:"ddl-batch-size" toml:"ddl-batch-size"`
@@ -299,7 +295,7 @@ func (cfg *RestoreConfig) LocalEncryptionEnabled() bool {
299295
// DefineRestoreFlags defines common flags for the restore tidb command.
300296
func DefineRestoreFlags(flags *pflag.FlagSet) {
301297
flags.Bool(flagNoSchema, false, "skip creating schemas and tables, reuse existing empty ones")
302-
flags.Bool(flagLoadStats, true, "Run load stats at end of snapshot restore task")
298+
flags.Bool(flagLoadStats, true, "Run load stats or update stats_meta to trigger auto-analyze at end of snapshot restore task")
303299
// Do not expose this flag
304300
_ = flags.MarkHidden(flagNoSchema)
305301
flags.String(FlagWithPlacementPolicy, "STRICT", "correspond to tidb global/session variable with-tidb-placement-mode")
@@ -409,10 +405,6 @@ func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig
409405
if err != nil {
410406
return errors.Annotatef(err, "failed to get flag %s", FlagStatsConcurrency)
411407
}
412-
cfg.AutoAnalyze, err = flags.GetBool(FlagAutoAnalyze)
413-
if err != nil {
414-
return errors.Annotatef(err, "failed to get flag %s", FlagAutoAnalyze)
415-
}
416408
cfg.BatchFlushInterval, err = flags.GetDuration(FlagBatchFlushInterval)
417409
if err != nil {
418410
return errors.Annotatef(err, "failed to get flag %s", FlagBatchFlushInterval)
@@ -1342,7 +1334,6 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s
13421334
LogProgress: cfg.LogProgress,
13431335
ChecksumConcurrency: cfg.ChecksumConcurrency,
13441336
StatsConcurrency: cfg.StatsConcurrency,
1345-
AutoAnalyze: cfg.AutoAnalyze,
13461337

13471338
KvClient: mgr.GetStorage().GetClient(),
13481339
ExtStorage: s,

br/tests/br_stats/run.sh

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ if [ "${dump_mark}" -ne "123" ]; then
4545
exit 1
4646
fi
4747

48+
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/${DB}2" --log-file $LOG --ignore-stats=true --filter "${DB}1.*"
49+
4850
for i in $(seq $DB_COUNT); do
4951
run_sql "DROP DATABASE $DB${i};"
5052
done
@@ -62,43 +64,34 @@ if [ "${load_mark}" -ne "22" ]; then
6264
exit 1
6365
fi
6466

67+
# test load stats is true but statistic data is not backed up
6568
run_sql "DROP DATABASE ${DB}1;"
6669

6770
rm -f $LOG
68-
run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --log-file $LOG --load-stats=false --filter "${DB}1.*" || cat $LOG
69-
table_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.TABLES tables ON meta.table_id = tables.TIDB_TABLE_ID WHERE tables.TABLE_SCHEMA = '${DB}1' and modify_count = 0;" | awk '/count/{print $2}')
71+
run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/${DB}2" --log-file $LOG --load-stats=true --filter "${DB}1.br_stats_partition" || cat $LOG
72+
table_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.TABLES tables ON meta.table_id = tables.TIDB_TABLE_ID WHERE tables.TABLE_SCHEMA = '${DB}1' and modify_count = count and count > 0;" | awk '/count/{print $2}')
7073
if [ "${table_count}" -ne 9 ]; then
7174
echo "table stats meta count does not equal to 9, but $count instead"
7275
exit 1
7376
fi
74-
p0_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p0' and modify_count = 0;" | awk '/count/{print $2}')
77+
p0_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p0' and modify_count = count and count > 0;" | awk '/count/{print $2}')
7578
if [ "${p0_count}" -ne 5 ]; then
7679
echo "partition p0 stats meta count does not equal to 5, but $p0_count instead"
7780
exit 1
7881
fi
79-
p1_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p1' and modify_count = 0;" | awk '/count/{print $2}')
82+
p1_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p1' and modify_count = count and count > 0;" | awk '/count/{print $2}')
8083
if [ "${p1_count}" -ne 4 ]; then
8184
echo "partition p1 stats meta count does not equal to 4, but $p1_count instead"
8285
exit 1
8386
fi
8487

85-
# test auto analyze
88+
# test load stats is false
8689
run_sql "DROP DATABASE ${DB}1;"
8790

8891
rm -f $LOG
89-
run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --log-file $LOG --load-stats=false --filter "${DB}1.*" --auto-analyze || cat $LOG
90-
table_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.TABLES tables ON meta.table_id = tables.TIDB_TABLE_ID WHERE tables.TABLE_SCHEMA = '${DB}1' and modify_count = count and count > 0;" | awk '/count/{print $2}')
91-
if [ "${table_count}" -ne 9 ]; then
92-
echo "table stats meta count does not equal to 9, but $count instead"
93-
exit 1
94-
fi
95-
p0_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p0' and modify_count = count and count > 0;" | awk '/count/{print $2}')
96-
if [ "${p0_count}" -ne 5 ]; then
97-
echo "partition p0 stats meta count does not equal to 5, but $p0_count instead"
98-
exit 1
99-
fi
100-
p1_count=$(run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and parts.PARTITION_NAME = 'p1' and modify_count = count and count > 0;" | awk '/count/{print $2}')
101-
if [ "${p1_count}" -ne 4 ]; then
102-
echo "partition p1 stats meta count does not equal to 4, but $p1_count instead"
103-
exit 1
104-
fi
92+
run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --log-file $LOG --load-stats=false --filter "${DB}1.br_stats_partition" || cat $LOG
93+
run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.TABLES tables ON meta.table_id = tables.TIDB_TABLE_ID WHERE tables.TABLE_SCHEMA = '${DB}1' and modify_count = count and count > 0;"
94+
check_not_contains "1. row"
95+
96+
run_sql "SELECT meta.count as count FROM mysql.stats_meta meta JOIN INFORMATION_SCHEMA.PARTITIONS parts ON meta.table_id = parts.TIDB_PARTITION_ID WHERE parts.TABLE_SCHEMA = '${DB}1' and modify_count = count and count > 0;"
97+
check_not_contains "1. row"

0 commit comments

Comments
 (0)