@@ -160,9 +160,9 @@ type ReservoirRowSampleItem struct {
160160// EmptyReservoirSampleItemSize = (24 + 16 + 8) now.
161161const EmptyReservoirSampleItemSize = int64 (unsafe .Sizeof (ReservoirRowSampleItem {}))
162162
163- // sketchSampleRate applies after a collector has gathered MaxFMSketchSize sketch rows.
163+ // DefaultNDVSampleRate applies after a collector has gathered MaxFMSketchSize sketch rows.
164164// The warm-up keeps small-table NDV behavior exact while bounding extra sketch work for large scans.
165- const sketchSampleRate = 0.01
165+ const DefaultNDVSampleRate = 0.01
166166
167167// MemUsage returns the memory usage of sample item.
168168func (i ReservoirRowSampleItem ) MemUsage () (sum int64 ) {
@@ -218,6 +218,7 @@ type RowSampleBuilder struct {
218218 ColGroups [][]int64
219219 MaxSampleSize int
220220 SampleRate float64
221+ NDVSampleRate float64
221222 MaxFMSketchSize int
222223}
223224
@@ -261,6 +262,11 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
261262 if minSketchSampleCount <= 0 {
262263 minSketchSampleCount = MaxSketchSize
263264 }
265+ ndvSampleRate := s .NDVSampleRate
266+ if ndvSampleRate <= 0 {
267+ ndvSampleRate = DefaultNDVSampleRate
268+ }
269+ ndvSampleRate = min (ndvSampleRate , 1 )
264270 ctx := context .TODO ()
265271 chk := s .RecordSet .NewChunk (nil )
266272 it := chunk .NewIterator4Chunk (chk )
@@ -300,7 +306,7 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
300306 datums [i ].SetBytes (encodedKey )
301307 }
302308 }
303- collectSketch := collector .Base ().SketchSampleCount < int64 (minSketchSampleCount ) || s .Rng .Float64 () < sketchSampleRate
309+ collectSketch := collector .Base ().SketchSampleCount < int64 (minSketchSampleCount ) || s .Rng .Float64 () < ndvSampleRate
304310 err := collector .Base ().collectColumns (s .Sc , datums , sizes , collectSketch )
305311 if err != nil {
306312 return nil , err
0 commit comments