Skip to content

Commit 28ba9c3

Browse files
committed
statistics: use analyze NDV rate proto
1 parent a51b267 commit 28ba9c3

3 files changed

Lines changed: 12 additions & 3 deletions

File tree

pkg/executor/builder.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,6 +3169,7 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(
31693169
modifyCount = int64(val.(int))
31703170
})
31713171
sampleRate := new(float64)
3172+
ndvRate := statistics.DefaultNDVSampleRate
31723173
var sampleRateReason string
31733174
if opts[ast.AnalyzeOptNumSamples] == 0 {
31743175
*sampleRate = math.Float64frombits(opts[ast.AnalyzeOptSampleRate])
@@ -3229,6 +3230,7 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(
32293230
BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]),
32303231
SampleSize: int64(opts[ast.AnalyzeOptNumSamples]),
32313232
SampleRate: sampleRate,
3233+
NdvRate: &ndvRate,
32323234
SketchSize: statistics.MaxSketchSize,
32333235
ColumnsInfo: util.ColumnsToProto(task.ColsInfo, task.TblInfo.PKIsHandle, false, false),
32343236
ColumnGroups: colGroups,

pkg/statistics/row_sampler.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ type ReservoirRowSampleItem struct {
160160
// EmptyReservoirSampleItemSize = (24 + 16 + 8) now.
161161
const EmptyReservoirSampleItemSize = int64(unsafe.Sizeof(ReservoirRowSampleItem{}))
162162

163-
// sketchSampleRate applies after a collector has gathered MaxFMSketchSize sketch rows.
163+
// DefaultNDVSampleRate applies after a collector has gathered MaxFMSketchSize sketch rows.
164164
// The warm-up keeps small-table NDV behavior exact while bounding extra sketch work for large scans.
165-
const sketchSampleRate = 0.01
165+
const DefaultNDVSampleRate = 0.01
166166

167167
// MemUsage returns the memory usage of sample item.
168168
func (i ReservoirRowSampleItem) MemUsage() (sum int64) {
@@ -218,6 +218,7 @@ type RowSampleBuilder struct {
218218
ColGroups [][]int64
219219
MaxSampleSize int
220220
SampleRate float64
221+
NDVSampleRate float64
221222
MaxFMSketchSize int
222223
}
223224

@@ -261,6 +262,11 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
261262
if minSketchSampleCount <= 0 {
262263
minSketchSampleCount = MaxSketchSize
263264
}
265+
ndvSampleRate := s.NDVSampleRate
266+
if ndvSampleRate <= 0 {
267+
ndvSampleRate = DefaultNDVSampleRate
268+
}
269+
ndvSampleRate = min(ndvSampleRate, 1)
264270
ctx := context.TODO()
265271
chk := s.RecordSet.NewChunk(nil)
266272
it := chunk.NewIterator4Chunk(chk)
@@ -300,7 +306,7 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
300306
datums[i].SetBytes(encodedKey)
301307
}
302308
}
303-
collectSketch := collector.Base().SketchSampleCount < int64(minSketchSampleCount) || s.Rng.Float64() < sketchSampleRate
309+
collectSketch := collector.Base().SketchSampleCount < int64(minSketchSampleCount) || s.Rng.Float64() < ndvSampleRate
304310
err := collector.Base().collectColumns(s.Sc, datums, sizes, collectSketch)
305311
if err != nil {
306312
return nil, err

pkg/store/mockstore/unistore/cophandler/analyze.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ func handleAnalyzeFullSamplingReq(
457457
MaxSampleSize: int(colReq.SampleSize),
458458
MaxFMSketchSize: int(colReq.SketchSize),
459459
SampleRate: colReq.GetSampleRate(),
460+
NDVSampleRate: colReq.GetNdvRate(),
460461
Rng: rand.New(rand.NewSource(time.Now().UnixNano())),
461462
}
462463
collector, err := builder.Collect()

0 commit comments

Comments
 (0)