cockroachdb · craig · Nov 25, 2025 · Nov 22, 2025 · Nov 22, 2025
@@ -151,10 +151,6 @@ const AutoStatsName = "__auto__"
 // automatically.
 const AutoPartialStatsName = "__auto_partial__"
 
-// ImportStatsName is the name to use for statistics created automatically
-// during import.
-const ImportStatsName = "__import__"
-
 // ForecastStatsName is the name to use for statistic forecasts.
 const ForecastStatsName = "__forecast__"
 

@@ -294,7 +294,7 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
 		virtColEnabled := statsOnVirtualCols.Get(n.p.ExecCfg().SV())
 		// Disable multi-column stats and deleting stats if partial statistics at
 		// the extremes are requested.
-		// TODO(faizaanmadhani): Add support for multi-column stats.
+		// TODO(#94076): add support for creating multi-column stats.
 		var multiColEnabled bool
 		if !n.Options.UsingExtremes {
 			multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(n.p.ExecCfg().SV())

@@ -137,14 +137,10 @@ func computeNumberSamples(ctx context.Context, numRows uint64, st *cluster.Setti
 		minSampleSize = minAutoHistogramSamples.Default()
 	}
 
-	numSamples := math.Max(
-		math.Min(
-			582.0*math.Pow(float64(numRows), 0.29),
-			float64(maxSampleSize),
-		),
+	return uint32(max(
+		min(582.0*math.Pow(float64(numRows), 0.29), float64(maxSampleSize)),
 		float64(minSampleSize),
-	)
-	return uint32(numSamples)
+	))
 }
 
 func (dsp *DistSQLPlanner) createAndAttachSamplers(
@@ -167,7 +163,6 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
 		if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok {
 			overhead = autoStatsFractionStaleRowsForTable
 		}
-		// Convert to a signed integer first to make the linter happy.
 		if details.UsingExtremes {
 			rowsExpected = uint64(int64(
 				// The total expected number of rows is the estimated number of stale
@@ -188,23 +183,19 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
 	sampler := &execinfrapb.SamplerSpec{
 		Sketches:         sketchSpec,
 		InvertedSketches: invSketchSpec,
+		MaxFractionIdle:  details.MaxFractionIdle,
 	}
-	sampler.MaxFractionIdle = details.MaxFractionIdle
-	// For partial statistics this loop should only iterate once
-	// since we only support one reqStat at a time.
+	// For partial statistics this loop should only iterate once since we only
+	// support one reqStat at a time.
 	for _, s := range reqStats {
 		if s.histogram {
 			var histogramSamplesCount uint32
 			if tableSampleCount, ok := desc.HistogramSamplesCount(); ok {
 				histogramSamplesCount = tableSampleCount
-			} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != histogramSamples.Default() {
+			} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != 0 {
 				histogramSamplesCount = uint32(clusterSampleCount)
 			} else {
-				histogramSamplesCount = computeNumberSamples(
-					ctx,
-					rowsExpected,
-					dsp.st,
-				)
+				histogramSamplesCount = computeNumberSamples(ctx, rowsExpected, dsp.st)
 				log.Dev.Infof(ctx, "using computed sample size of %d for histogram construction", histogramSamplesCount)
 			}
 			sampler.SampleSize = histogramSamplesCount
@@ -219,7 +210,7 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
 
 	// The sampler outputs the original columns plus a rank column, five
 	// sketch columns, and two inverted histogram columns.
-	outTypes := make([]*types.T, 0, len(p.GetResultTypes())+5)
+	outTypes := make([]*types.T, 0, len(p.GetResultTypes())+8)
 	outTypes = append(outTypes, p.GetResultTypes()...)
 	// An INT column for the rank of each row.
 	outTypes = append(outTypes, types.Int)
@@ -292,18 +283,15 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 	// so we only support one requested stat at a time here.
 	if len(reqStats) > 1 {
 		return nil, unimplemented.NewWithIssue(
-			128904,
-			"cannot process multiple partial statistics requests at once",
+			128904, "cannot process multiple partial statistics requests at once",
 		)
 	}
 
 	reqStat := reqStats[0]
-
 	if len(reqStat.columns) > 1 {
-		// TODO (faizaanmadhani): Add support for creating multi-column stats
+		// TODO(#94076): add support for creating multi-column stats.
 		return nil, pgerror.Newf(pgcode.FeatureNotSupported, "multi-column partial statistics are not currently supported")
 	}
-
 	if !reqStat.histogram {
 		return nil, pgerror.Newf(pgcode.FeatureNotSupported, "partial statistics without histograms are not supported")
 	}
@@ -324,9 +312,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 		return nil, err
 	}
 
-	// Calculate the column we need to scan
-	// TODO (faizaanmadhani): Iterate through all columns in a requested stat when
-	// when we add support for multi-column statistics.
+	// Calculate the column we need to scan.
+	// TODO(#94076): iterate through all columns in a requested stat when we add
+	// support for multi-column statistics.
 	var colCfg scanColumnsConfig
 	colCfg.wantedColumns = append(colCfg.wantedColumns, column.GetID())
 
@@ -341,19 +329,18 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 	if err != nil {
 		return nil, err
 	}
-	// Map the ColumnIDs to their ordinals in scan.cols
-	// This loop should only iterate once, since we only
-	// handle single column partial statistics.
-	// TODO(faizaanmadhani): Add support for multi-column partial stats next
+	// Map the ColumnIDs to their ordinals in scan.cols. This loop should only
+	// iterate once, since we only handle single column partial statistics.
+	// TODO(#94076): add support for creating multi-column stats.
 	var colIdxMap catalog.TableColMap
 	for i, c := range scan.catalogCols {
 		colIdxMap.Set(c.GetID(), i)
 	}
 
 	var stat *stats.TableStatistic
 	// Find the statistic from the newest table statistic for our column that is
-	// not partial and not forecasted. The first one we find will be the latest
-	// due to the newest to oldest ordering property of the cache.
+	// not partial, not merged, and not forecasted. The first one we find will
+	// be the latest due to the newest to oldest ordering property of the cache.
 	for _, t := range tableStats {
 		if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
 			!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
@@ -380,7 +367,8 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 		return nil, pgerror.Newf(
 			pgcode.ObjectNotInPrerequisiteState,
 			"column %s does not have a prior statistic",
-			column.GetName())
+			column.GetName(),
+		)
 	}
 	if len(stat.Histogram) == 1 && stat.Histogram[0].UpperBound == tree.DNull {
 		return nil, pgerror.Newf(
@@ -398,8 +386,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 			planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
 		)
 
-		lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx,
-			planCtx.EvalContext(), stat.Histogram)
+		lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(
+			ctx, planCtx.EvalContext(), stat.Histogram,
+		)
 		if err != nil {
 			return nil, err
 		}
@@ -413,13 +402,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 		}
 		prevLowerBound = lowerBound
 
-		extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound,
-			upperBound, scan.index)
+		extremesSpans, err := bounds.ConstructUsingExtremesSpans(
+			lowerBound, upperBound, scan.index,
+		)
 		if err != nil {
 			return nil, err
 		}
 		predicate = bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
-		// Get roachpb.Spans from constraint.Spans
 		scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
 		if err != nil {
 			return nil, err
@@ -456,9 +445,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 		spec.FullStatisticID = stat.StatisticID
 	}
 
-	// For now, this loop should iterate only once, as we only
-	// handle single-column partial statistics.
-	// TODO(faizaanmadhani): Add support for multi-column partial stats next
+	// For now, this loop should iterate only once, as we only handle
+	// single-column partial statistics.
+	// TODO(#94076): add support for creating multi-column stats.
 	for i, colID := range reqStat.columns {
 		colIdx, ok := colIdxMap.Get(colID)
 		if !ok {
@@ -497,16 +486,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
 		sketchSpec = append(sketchSpec, spec)
 	}
 	return dsp.createAndAttachSamplers(
-		ctx,
-		p,
-		desc,
-		tableStats,
-		details,
-		sampledColumnIDs,
-		jobID,
-		reqStats,
-		sketchSpec, invSketchSpec,
-		numIndexes, curIndex), nil
+		ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
+		sketchSpec, invSketchSpec, numIndexes, curIndex,
+	), nil
 }
 
 func (dsp *DistSQLPlanner) createStatsPlan(
@@ -748,16 +730,9 @@ func (dsp *DistSQLPlanner) createStatsPlan(
 	}
 
 	return dsp.createAndAttachSamplers(
-		ctx,
-		p,
-		desc,
-		tableStats,
-		details,
-		sampledColumnIDs,
-		jobID,
-		reqStats,
-		sketchSpecs, invSketchSpecs,
-		numIndexes, curIndex), nil
+		ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
+		sketchSpecs, invSketchSpecs, numIndexes, curIndex,
+	), nil
 }
 
 // createPlanForCreateStats creates the DistSQL plan to perform the table stats

diff --git a/pkg/sql/logictest/testdata/logic_test/constrained_stats b/pkg/sql/logictest/testdata/logic_test/constrained_stats
@@ -2,9 +2,6 @@
 
 # Tests for creating partial table statistics with a WHERE clause.
 
-statement ok
-SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
-
 statement ok
 CREATE TABLE products (
     id INT PRIMARY KEY,

diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_automatic_partial_stats b/pkg/sql/logictest/testdata/logic_test/distsql_automatic_partial_stats
@@ -2,10 +2,6 @@
 
 # Test a simple update and insert case for partial statistics
 
-# Disable automatic stats
-statement ok
-SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
-
 statement ok
 SET CLUSTER SETTING sql.stats.automatic_partial_collection.min_stale_rows = 5
 

@@ -13,6 +13,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
+	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
 	"github.com/cockroachdb/cockroach/pkg/util/encoding"
 	"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
 	"github.com/cockroachdb/errors"
@@ -340,3 +341,20 @@ func MaybeMarkRedactable(unsafe string, markRedactable bool) string {
 	}
 	return unsafe
 }
+
+// FindLatestFullStat finds the most recent full statistic that can be used for
+// planning and returns the index to be used with tab.Statistic(). If such
+// doesn't exist (meaning that either there are no full stats altogether or that
+// the present ones cannot be used based on the session variables), then
+// tab.StatisticCount() is returned.
+func FindLatestFullStat(tab Table, sd *sessiondata.SessionData) int {
+	// Stats are ordered with most recent first.
+	var first int
+	for first < tab.StatisticCount() &&
+		(tab.Statistic(first).IsPartial() ||
+			(tab.Statistic(first).IsMerged() && !sd.OptimizerUseMergedPartialStatistics) ||
+			(tab.Statistic(first).IsForecast() && !sd.OptimizerUseForecasts)) {
+		first++
+	}
+	return first
+}
@@ -410,34 +410,24 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
 		}
 		if scan, ok := e.(*memo.ScanExpr); ok {
 			tab := b.mem.Metadata().Table(scan.Table)
-			if tab.StatisticCount() > 0 {
-				// The first stat is the most recent full one.
-				var first int
-				for first < tab.StatisticCount() &&
-					(tab.Statistic(first).IsPartial() ||
-						(tab.Statistic(first).IsMerged() && !b.evalCtx.SessionData().OptimizerUseMergedPartialStatistics) ||
-						(tab.Statistic(first).IsForecast() && !b.evalCtx.SessionData().OptimizerUseForecasts)) {
-					first++
+			first := cat.FindLatestFullStat(tab, b.evalCtx.SessionData())
+			if first < tab.StatisticCount() {
+				stat := tab.Statistic(first)
+				val.TableStatsRowCount = stat.RowCount()
+				if val.TableStatsRowCount == 0 {
+					val.TableStatsRowCount = 1
 				}
-
-				if first < tab.StatisticCount() {
-					stat := tab.Statistic(first)
-					val.TableStatsRowCount = stat.RowCount()
-					if val.TableStatsRowCount == 0 {
-						val.TableStatsRowCount = 1
-					}
-					val.TableStatsCreatedAt = stat.CreatedAt()
-					val.LimitHint = scan.RequiredPhysical().LimitHint
-					val.Forecast = stat.IsForecast()
-					if val.Forecast {
-						val.ForecastAt = stat.CreatedAt()
-						// Find the first non-forecast full stat.
-						for i := first + 1; i < tab.StatisticCount(); i++ {
-							nextStat := tab.Statistic(i)
-							if !nextStat.IsPartial() && !nextStat.IsForecast() {
-								val.TableStatsCreatedAt = nextStat.CreatedAt()
-								break
-							}
+				val.TableStatsCreatedAt = stat.CreatedAt()
+				val.LimitHint = scan.RequiredPhysical().LimitHint
+				val.Forecast = stat.IsForecast()
+				if val.Forecast {
+					val.ForecastAt = stat.CreatedAt()
+					// Find the first non-forecast full stat.
+					for i := first + 1; i < tab.StatisticCount(); i++ {
+						nextStat := tab.Statistic(i)
+						if !nextStat.IsPartial() && !nextStat.IsForecast() {
+							val.TableStatsCreatedAt = nextStat.CreatedAt()
+							break
 						}
 					}
 				}
@@ -898,26 +888,23 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
 		b.TotalScanRows += stats.RowCount
 		b.ScanCounts[exec.ScanWithStatsCount]++
 
-		// The first stat is the most recent full one. Check if it was a forecast.
-		var first int
-		for first < tab.StatisticCount() && tab.Statistic(first).IsPartial() {
-			first++
-		}
+		sd := b.evalCtx.SessionData()
+		first := cat.FindLatestFullStat(tab, sd)
 		if first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
-			if b.evalCtx.SessionData().OptimizerUseForecasts {
-				b.ScanCounts[exec.ScanWithStatsForecastCount]++
+			b.ScanCounts[exec.ScanWithStatsForecastCount]++
 
-				// Calculate time since the forecast (or negative time until the forecast).
-				nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
-				if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
-					b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
-				}
-			}
-			// Find the first non-forecast full stat.
-			for first < tab.StatisticCount() &&
-				(tab.Statistic(first).IsPartial() || tab.Statistic(first).IsForecast()) {
-				first++
+			// Calculate time since the forecast (or negative time until the forecast).
+			nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
+			if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
+				b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
 			}
+
+			// Since currently 'first' points at the forecast, then usage of the
+			// forecasts must be enabled, so in order to find the first full
+			// non-forecast stat, we'll temporarily disable their usage.
+			sd.OptimizerUseForecasts = false
+			first = cat.FindLatestFullStat(tab, sd)
+			sd.OptimizerUseForecasts = true
 		}
 
 		if first < tab.StatisticCount() {
@@ -945,8 +932,9 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
 	}
 
 	var params exec.ScanParams
-	params, outputCols, err = b.scanParams(tab, &scan.ScanPrivate,
-		scan.Relational(), scan.RequiredPhysical(), statsCreatedAt)
+	params, outputCols, err = b.scanParams(
+		tab, &scan.ScanPrivate, scan.Relational(), scan.RequiredPhysical(), statsCreatedAt,
+	)
 	if err != nil {
 		return execPlan{}, colOrdMap{}, err
 	}