Skip to content

Commit 2b9527b

Browse files
authored
*: bump tipb and refactor analyze sampling helpers (#68414)
ref #67449
1 parent f1b85a7 commit 2b9527b

5 files changed

Lines changed: 44 additions & 29 deletions

File tree

DEPS.bzl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6582,13 +6582,13 @@ def go_deps():
65826582
name = "com_github_pingcap_tipb",
65836583
build_file_proto_mode = "disable_global",
65846584
importpath = "github.com/pingcap/tipb",
6585-
sha256 = "68768a27ed6c35716fcb01a0b4a15ff13e5c1a5dc11acc7a3d44ba02a2742077",
6586-
strip_prefix = "github.com/pingcap/tipb@v0.0.0-20260414032333-da912b84de6f",
6585+
sha256 = "a47ec816b2fa1924a4db5c2270a3bfb70f7c5bcc790b59287b5e9680b71bfbcd",
6586+
strip_prefix = "github.com/pingcap/tipb@v0.0.0-20260515142222-a4d204a193b4",
65876587
urls = [
6588-
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip",
6589-
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip",
6590-
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip",
6591-
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip",
6588+
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip",
6589+
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip",
6590+
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip",
6591+
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip",
65926592
],
65936593
)
65946594
go_repository(

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ require (
106106
github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d
107107
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5
108108
github.com/pingcap/tidb/pkg/parser v0.0.0-20211011031125-9b13dc409c5e
109-
github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f
109+
github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4
110110
github.com/prometheus/client_golang v1.23.0
111111
github.com/prometheus/client_model v0.6.2
112112
github.com/prometheus/common v0.65.0
@@ -352,7 +352,7 @@ require (
352352
google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect
353353
google.golang.org/protobuf v1.36.10
354354
gopkg.in/inf.v0 v0.9.1 // indirect
355-
gopkg.in/natefinch/lumberjack.v2 v2.2.1
355+
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
356356
gopkg.in/yaml.v3 v3.0.1 // indirect
357357
k8s.io/apimachinery v0.29.11 // indirect
358358
k8s.io/klog/v2 v2.120.1 // indirect

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,8 @@ github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d h1:5JCgncG9X7
738738
github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d/go.mod h1:HMNxmg0/lrn3SPGJ6LTZqP0WwEpcXMu9s/4TWJbzT8w=
739739
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeAhmOQHNPRRogMYxP13V7BkYw3ZsoSfE=
740740
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530=
741-
github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f h1:+IEEq1wl/kxfGK/qOCe9Bu0Kk9ERqxrzeGoKazevWrw=
742-
github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f/go.mod h1:RM8iRcMalzOthG2XJxnNBniM4xFGb/lDwHUwqkaVzt4=
741+
github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4 h1:7kN995aOhNamG8IOnN7Rj6nNqq+F3Z2AyfPGjCNdqoI=
742+
github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4/go.mod h1:RM8iRcMalzOthG2XJxnNBniM4xFGb/lDwHUwqkaVzt4=
743743
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
744744
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
745745
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=

pkg/executor/analyze_col_sampling.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,9 @@ func (e *AnalyzeColumnsExec) buildSamplingStats(
217217
}
218218
}()
219219

220-
l := len(e.analyzePB.ColReq.ColumnsInfo) + len(e.analyzePB.ColReq.ColumnGroups)
221-
rootRowCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l)
222-
for range l {
220+
totalLen := len(e.analyzePB.ColReq.ColumnsInfo) + len(e.analyzePB.ColReq.ColumnGroups)
221+
rootRowCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen)
222+
for range totalLen {
223223
rootRowCollector.Base().FMSketches = append(rootRowCollector.Base().FMSketches, statistics.NewFMSketch(statistics.MaxSketchSize))
224224
}
225225

@@ -251,7 +251,7 @@ func (e *AnalyzeColumnsExec) buildSamplingStats(
251251
for i := range samplingStatsConcurrency {
252252
id := i
253253
gp.Go(func() {
254-
e.subMergeWorker(mergeCtx, taskCancel, mergeResultCh, mergeTaskCh, l, id)
254+
e.subMergeWorker(mergeCtx, taskCancel, mergeResultCh, mergeTaskCh, totalLen, id)
255255
})
256256
}
257257
// Merge the result from collectors.
@@ -342,7 +342,6 @@ func (e *AnalyzeColumnsExec) buildSamplingStats(
342342
return i.Handle.Compare(j.Handle)
343343
})
344344

345-
totalLen := len(e.colsInfo) + len(e.indexes)
346345
hists = make([]*statistics.Histogram, totalLen)
347346
topns = make([]*statistics.TopN, totalLen)
348347
fmSketches = make([]*statistics.FMSketch, 0, totalLen)
@@ -602,7 +601,7 @@ func (e *AnalyzeColumnsExec) subMergeWorker(
602601
cancel context.CancelCauseFunc,
603602
resultCh chan<- *samplingMergeResult,
604603
taskCh <-chan []byte,
605-
l int,
604+
totalLen int,
606605
index int,
607606
) {
608607
// Only close the resultCh in the first worker.
@@ -640,8 +639,8 @@ func (e *AnalyzeColumnsExec) subMergeWorker(
640639
}
641640
})
642641
// Keep one private collector per merge worker and flush it when taskCh is closed.
643-
retCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l)
644-
for range l {
642+
retCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen)
643+
for range totalLen {
645644
retCollector.Base().FMSketches = append(retCollector.Base().FMSketches, statistics.NewFMSketch(statistics.MaxSketchSize))
646645
}
647646
// Early-return paths need to release the worker-local collector explicitly.
@@ -671,7 +670,7 @@ func (e *AnalyzeColumnsExec) subMergeWorker(
671670
inflightRespSize = int64(colResp.Size())
672671
e.memTracker.Consume(inflightRespSize)
673672

674-
subCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l)
673+
subCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen)
675674
subCollector.Base().FromProto(colResp.RowCollector, e.memTracker)
676675
statsHandle.UpdateAnalyzeJobProgress(e.job, subCollector.Base().Count)
677676

pkg/statistics/fmsketch.go

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,24 +121,41 @@ func (s *FMSketch) insertHashValue(hashVal uint64) {
121121

122122
// InsertValue inserts a value into the FM sketch.
123123
func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error {
124+
hashVal, err := hashDatum(sc, value)
125+
if err != nil {
126+
return errors.Trace(err)
127+
}
128+
s.insertHashValue(hashVal)
129+
return nil
130+
}
131+
132+
// InsertRowValue inserts multi-column values to the sketch.
133+
func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error {
134+
hashVal, err := hashRow(sc, values)
135+
if err != nil {
136+
return errors.Trace(err)
137+
}
138+
s.insertHashValue(hashVal)
139+
return nil
140+
}
141+
142+
func hashDatum(sc *stmtctx.StatementContext, value types.Datum) (uint64, error) {
124143
bytes, err := codec.EncodeValue(sc.TimeZone(), nil, value)
125144
err = sc.HandleError(err)
126145
if err != nil {
127-
return errors.Trace(err)
146+
return 0, err
128147
}
129148
hashFunc := murmur3Pool.Get().(hash.Hash64)
130149
hashFunc.Reset()
131150
defer murmur3Pool.Put(hashFunc)
132151
_, err = hashFunc.Write(bytes)
133152
if err != nil {
134-
return errors.Trace(err)
153+
return 0, err
135154
}
136-
s.insertHashValue(hashFunc.Sum64())
137-
return nil
155+
return hashFunc.Sum64(), nil
138156
}
139157

140-
// InsertRowValue inserts multi-column values to the sketch.
141-
func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error {
158+
func hashRow(sc *stmtctx.StatementContext, values []types.Datum) (uint64, error) {
142159
b := make([]byte, 0, 8)
143160
hashFunc := murmur3Pool.Get().(hash.Hash64)
144161
hashFunc.Reset()
@@ -150,15 +167,14 @@ func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.D
150167
b, err := codec.EncodeValue(sc.TimeZone(), b, v)
151168
err = errCtx.HandleError(err)
152169
if err != nil {
153-
return err
170+
return 0, err
154171
}
155172
_, err = hashFunc.Write(b)
156173
if err != nil {
157-
return err
174+
return 0, err
158175
}
159176
}
160-
s.insertHashValue(hashFunc.Sum64())
161-
return nil
177+
return hashFunc.Sum64(), nil
162178
}
163179

164180
// MergeFMSketch merges two FM Sketch.

0 commit comments

Comments
 (0)