Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pkg/jobs/jobspb/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,6 @@ const AutoStatsName = "__auto__"
// automatically.
const AutoPartialStatsName = "__auto_partial__"

// ImportStatsName is the name to use for statistics created automatically
// during import.
const ImportStatsName = "__import__"

// ForecastStatsName is the name to use for statistic forecasts.
const ForecastStatsName = "__forecast__"

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
virtColEnabled := statsOnVirtualCols.Get(n.p.ExecCfg().SV())
// Disable multi-column stats and deleting stats if partial statistics at
// the extremes are requested.
// TODO(faizaanmadhani): Add support for multi-column stats.
// TODO(#94076): add support for creating multi-column stats.
var multiColEnabled bool
if !n.Options.UsingExtremes {
multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(n.p.ExecCfg().SV())
Expand Down
97 changes: 36 additions & 61 deletions pkg/sql/distsql_plan_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,10 @@ func computeNumberSamples(ctx context.Context, numRows uint64, st *cluster.Setti
minSampleSize = minAutoHistogramSamples.Default()
}

numSamples := math.Max(
math.Min(
582.0*math.Pow(float64(numRows), 0.29),
float64(maxSampleSize),
),
return uint32(max(
min(582.0*math.Pow(float64(numRows), 0.29), float64(maxSampleSize)),
float64(minSampleSize),
)
return uint32(numSamples)
))
}

func (dsp *DistSQLPlanner) createAndAttachSamplers(
Expand All @@ -167,7 +163,6 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
if autoStatsFractionStaleRowsForTable, ok := desc.AutoStatsFractionStaleRows(); ok {
overhead = autoStatsFractionStaleRowsForTable
}
// Convert to a signed integer first to make the linter happy.
if details.UsingExtremes {
rowsExpected = uint64(int64(
// The total expected number of rows is the estimated number of stale
Expand All @@ -188,23 +183,19 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(
sampler := &execinfrapb.SamplerSpec{
Sketches: sketchSpec,
InvertedSketches: invSketchSpec,
MaxFractionIdle: details.MaxFractionIdle,
}
sampler.MaxFractionIdle = details.MaxFractionIdle
// For partial statistics this loop should only iterate once
// since we only support one reqStat at a time.
// For partial statistics this loop should only iterate once since we only
// support one reqStat at a time.
for _, s := range reqStats {
if s.histogram {
var histogramSamplesCount uint32
if tableSampleCount, ok := desc.HistogramSamplesCount(); ok {
histogramSamplesCount = tableSampleCount
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != histogramSamples.Default() {
} else if clusterSampleCount := histogramSamples.Get(&dsp.st.SV); clusterSampleCount != 0 {
histogramSamplesCount = uint32(clusterSampleCount)
} else {
histogramSamplesCount = computeNumberSamples(
ctx,
rowsExpected,
dsp.st,
)
histogramSamplesCount = computeNumberSamples(ctx, rowsExpected, dsp.st)
log.Dev.Infof(ctx, "using computed sample size of %d for histogram construction", histogramSamplesCount)
}
sampler.SampleSize = histogramSamplesCount
Expand All @@ -219,7 +210,7 @@ func (dsp *DistSQLPlanner) createAndAttachSamplers(

// The sampler outputs the original columns plus a rank column, five
// sketch columns, and two inverted histogram columns.
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+5)
outTypes := make([]*types.T, 0, len(p.GetResultTypes())+8)
outTypes = append(outTypes, p.GetResultTypes()...)
// An INT column for the rank of each row.
outTypes = append(outTypes, types.Int)
Expand Down Expand Up @@ -292,18 +283,15 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
// so we only support one requested stat at a time here.
if len(reqStats) > 1 {
return nil, unimplemented.NewWithIssue(
128904,
"cannot process multiple partial statistics requests at once",
128904, "cannot process multiple partial statistics requests at once",
)
}

reqStat := reqStats[0]

if len(reqStat.columns) > 1 {
// TODO (faizaanmadhani): Add support for creating multi-column stats
// TODO(#94076): add support for creating multi-column stats.
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "multi-column partial statistics are not currently supported")
}

if !reqStat.histogram {
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "partial statistics without histograms are not supported")
}
Expand All @@ -324,9 +312,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
return nil, err
}

// Calculate the column we need to scan
// TODO (faizaanmadhani): Iterate through all columns in a requested stat when
// when we add support for multi-column statistics.
// Calculate the column we need to scan.
// TODO(#94076): iterate through all columns in a requested stat when we add
// support for multi-column statistics.
var colCfg scanColumnsConfig
colCfg.wantedColumns = append(colCfg.wantedColumns, column.GetID())

Expand All @@ -341,19 +329,18 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
if err != nil {
return nil, err
}
// Map the ColumnIDs to their ordinals in scan.cols
// This loop should only iterate once, since we only
// handle single column partial statistics.
// TODO(faizaanmadhani): Add support for multi-column partial stats next
// Map the ColumnIDs to their ordinals in scan.cols. This loop should only
// iterate once, since we only handle single column partial statistics.
// TODO(#94076): add support for creating multi-column stats.
var colIdxMap catalog.TableColMap
for i, c := range scan.catalogCols {
colIdxMap.Set(c.GetID(), i)
}

var stat *stats.TableStatistic
// Find the statistic from the newest table statistic for our column that is
// not partial and not forecasted. The first one we find will be the latest
// due to the newest to oldest ordering property of the cache.
// not partial, not merged, and not forecasted. The first one we find will
// be the latest due to the newest to oldest ordering property of the cache.
for _, t := range tableStats {
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
Expand All @@ -380,7 +367,8 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
return nil, pgerror.Newf(
pgcode.ObjectNotInPrerequisiteState,
"column %s does not have a prior statistic",
column.GetName())
column.GetName(),
)
}
if len(stat.Histogram) == 1 && stat.Histogram[0].UpperBound == tree.DNull {
return nil, pgerror.Newf(
Expand All @@ -398,8 +386,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
)

lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx,
planCtx.EvalContext(), stat.Histogram)
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(
ctx, planCtx.EvalContext(), stat.Histogram,
)
if err != nil {
return nil, err
}
Expand All @@ -413,13 +402,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
}
prevLowerBound = lowerBound

extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound,
upperBound, scan.index)
extremesSpans, err := bounds.ConstructUsingExtremesSpans(
lowerBound, upperBound, scan.index,
)
if err != nil {
return nil, err
}
predicate = bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
// Get roachpb.Spans from constraint.Spans
scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
if err != nil {
return nil, err
Expand Down Expand Up @@ -456,9 +445,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
spec.FullStatisticID = stat.StatisticID
}

// For now, this loop should iterate only once, as we only
// handle single-column partial statistics.
// TODO(faizaanmadhani): Add support for multi-column partial stats next
// For now, this loop should iterate only once, as we only handle
// single-column partial statistics.
// TODO(#94076): add support for creating multi-column stats.
for i, colID := range reqStat.columns {
colIdx, ok := colIdxMap.Get(colID)
if !ok {
Expand Down Expand Up @@ -497,16 +486,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
sketchSpec = append(sketchSpec, spec)
}
return dsp.createAndAttachSamplers(
ctx,
p,
desc,
tableStats,
details,
sampledColumnIDs,
jobID,
reqStats,
sketchSpec, invSketchSpec,
numIndexes, curIndex), nil
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
sketchSpec, invSketchSpec, numIndexes, curIndex,
), nil
}

func (dsp *DistSQLPlanner) createStatsPlan(
Expand Down Expand Up @@ -748,16 +730,9 @@ func (dsp *DistSQLPlanner) createStatsPlan(
}

return dsp.createAndAttachSamplers(
ctx,
p,
desc,
tableStats,
details,
sampledColumnIDs,
jobID,
reqStats,
sketchSpecs, invSketchSpecs,
numIndexes, curIndex), nil
ctx, p, desc, tableStats, details, sampledColumnIDs, jobID, reqStats,
sketchSpecs, invSketchSpecs, numIndexes, curIndex,
), nil
}

// createPlanForCreateStats creates the DistSQL plan to perform the table stats
Expand Down
3 changes: 0 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/constrained_stats
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

# Tests for creating partial table statistics with a WHERE clause.

statement ok
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false

statement ok
CREATE TABLE products (
id INT PRIMARY KEY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

# Test a simple update and insert case for partial statistics

# Disable automatic stats
statement ok
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false

statement ok
SET CLUSTER SETTING sql.stats.automatic_partial_collection.min_stale_rows = 5

Expand Down
18 changes: 18 additions & 0 deletions pkg/sql/opt/cat/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -340,3 +341,20 @@ func MaybeMarkRedactable(unsafe string, markRedactable bool) string {
}
return unsafe
}

// FindLatestFullStat finds the most recent full statistic that can be used for
// planning and returns the index to be used with tab.Statistic(). If such
// doesn't exist (meaning that either there are no full stats altogether or that
// the present ones cannot be used based on the session variables), then
// tab.StatisticCount() is returned.
func FindLatestFullStat(tab Table, sd *sessiondata.SessionData) int {
// Stats are ordered with most recent first.
var first int
for first < tab.StatisticCount() &&
(tab.Statistic(first).IsPartial() ||
(tab.Statistic(first).IsMerged() && !sd.OptimizerUseMergedPartialStatistics) ||
(tab.Statistic(first).IsForecast() && !sd.OptimizerUseForecasts)) {
first++
}
return first
}
80 changes: 34 additions & 46 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,34 +410,24 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
}
if scan, ok := e.(*memo.ScanExpr); ok {
tab := b.mem.Metadata().Table(scan.Table)
if tab.StatisticCount() > 0 {
// The first stat is the most recent full one.
var first int
for first < tab.StatisticCount() &&
(tab.Statistic(first).IsPartial() ||
(tab.Statistic(first).IsMerged() && !b.evalCtx.SessionData().OptimizerUseMergedPartialStatistics) ||
(tab.Statistic(first).IsForecast() && !b.evalCtx.SessionData().OptimizerUseForecasts)) {
first++
first := cat.FindLatestFullStat(tab, b.evalCtx.SessionData())
if first < tab.StatisticCount() {
stat := tab.Statistic(first)
val.TableStatsRowCount = stat.RowCount()
if val.TableStatsRowCount == 0 {
val.TableStatsRowCount = 1
}

if first < tab.StatisticCount() {
stat := tab.Statistic(first)
val.TableStatsRowCount = stat.RowCount()
if val.TableStatsRowCount == 0 {
val.TableStatsRowCount = 1
}
val.TableStatsCreatedAt = stat.CreatedAt()
val.LimitHint = scan.RequiredPhysical().LimitHint
val.Forecast = stat.IsForecast()
if val.Forecast {
val.ForecastAt = stat.CreatedAt()
// Find the first non-forecast full stat.
for i := first + 1; i < tab.StatisticCount(); i++ {
nextStat := tab.Statistic(i)
if !nextStat.IsPartial() && !nextStat.IsForecast() {
val.TableStatsCreatedAt = nextStat.CreatedAt()
break
}
val.TableStatsCreatedAt = stat.CreatedAt()
val.LimitHint = scan.RequiredPhysical().LimitHint
val.Forecast = stat.IsForecast()
if val.Forecast {
val.ForecastAt = stat.CreatedAt()
// Find the first non-forecast full stat.
for i := first + 1; i < tab.StatisticCount(); i++ {
nextStat := tab.Statistic(i)
if !nextStat.IsPartial() && !nextStat.IsForecast() {
val.TableStatsCreatedAt = nextStat.CreatedAt()
break
}
}
}
Expand Down Expand Up @@ -898,26 +888,23 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
b.TotalScanRows += stats.RowCount
b.ScanCounts[exec.ScanWithStatsCount]++

// The first stat is the most recent full one. Check if it was a forecast.
var first int
for first < tab.StatisticCount() && tab.Statistic(first).IsPartial() {
first++
}
sd := b.evalCtx.SessionData()
first := cat.FindLatestFullStat(tab, sd)
if first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
if b.evalCtx.SessionData().OptimizerUseForecasts {
b.ScanCounts[exec.ScanWithStatsForecastCount]++
b.ScanCounts[exec.ScanWithStatsForecastCount]++

// Calculate time since the forecast (or negative time until the forecast).
nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
}
}
// Find the first non-forecast full stat.
for first < tab.StatisticCount() &&
(tab.Statistic(first).IsPartial() || tab.Statistic(first).IsForecast()) {
first++
// Calculate time since the forecast (or negative time until the forecast).
nanosSinceStatsForecasted := timeutil.Since(tab.Statistic(first).CreatedAt())
if nanosSinceStatsForecasted.Abs() > b.NanosSinceStatsForecasted.Abs() {
b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
}

// Since currently 'first' points at the forecast, then usage of the
// forecasts must be enabled, so in order to find the first full
// non-forecast stat, we'll temporarily disable their usage.
sd.OptimizerUseForecasts = false
first = cat.FindLatestFullStat(tab, sd)
sd.OptimizerUseForecasts = true
}

if first < tab.StatisticCount() {
Expand Down Expand Up @@ -945,8 +932,9 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
}

var params exec.ScanParams
params, outputCols, err = b.scanParams(tab, &scan.ScanPrivate,
scan.Relational(), scan.RequiredPhysical(), statsCreatedAt)
params, outputCols, err = b.scanParams(
tab, &scan.ScanPrivate, scan.Relational(), scan.RequiredPhysical(), statsCreatedAt,
)
if err != nil {
return execPlan{}, colOrdMap{}, err
}
Expand Down
Loading
Loading