Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -11354,6 +11354,39 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "sample_values_probability",
"required": false,
"desc": "Probability of sampling a label value for regex selectivity estimation (0.0-1.0). Sampled values are held in memory in addition to series. Set to 0 to disable; regexes will be estimated to select 10% of values.",
"fieldValue": null,
"fieldDefaultValue": 0.01,
"fieldFlag": "blocks-storage.tsdb.index-lookup-planning.sample-values-probability",
"fieldType": "float",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "sample_values_max_count",
"required": false,
"desc": "Maximum number of sampled values to store per label name. Set to 0 to disable; regexes will be estimated to select 10% of values.",
"fieldValue": null,
"fieldDefaultValue": 1024,
"fieldFlag": "blocks-storage.tsdb.index-lookup-planning.sample-values-max-count",
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "sample_values_max_bytes",
"required": false,
"desc": "Maximum total size in bytes of sampled values per label name. Set to 0 to disable; regexes will be estimated to select 10% of values.",
"fieldValue": null,
"fieldDefaultValue": 65536,
"fieldFlag": "blocks-storage.tsdb.index-lookup-planning.sample-values-max-bytes",
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "index_lookup_planning_enabled",
Expand Down
6 changes: 6 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,12 @@ Usage of ./cmd/mimir/mimir:
Cost for retrieving the posting list from disk or from memory. (default 10)
-blocks-storage.tsdb.index-lookup-planning.retrieved-series-cost float
Cost for retrieving series from the index and checking if a series belongs to the query's shard. (default 15)
-blocks-storage.tsdb.index-lookup-planning.sample-values-max-bytes int
Maximum total size in bytes of sampled values per label name. Set to 0 to disable; regexes will be estimated to select 10% of values. (default 65536)
-blocks-storage.tsdb.index-lookup-planning.sample-values-max-count int
Maximum number of sampled values to store per label name. Set to 0 to disable; regexes will be estimated to select 10% of values. (default 1024)
-blocks-storage.tsdb.index-lookup-planning.sample-values-probability float
Probability of sampling a label value for regex selectivity estimation (0.0-1.0). Sampled values are held in memory in addition to series. Set to 0 to disable; regexes will be estimated to select 10% of values. (default 0.01)
-blocks-storage.tsdb.index-lookup-planning.statistics-collection-frequency duration
[experimental] How frequently to collect block statistics, which are used in query execution optimization. 0 to disable. (default 1h0m0s)
-blocks-storage.tsdb.memory-snapshot-on-shutdown
Expand Down
17 changes: 17 additions & 0 deletions docs/sources/mimir/configure/configuration-parameters/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -5257,6 +5257,23 @@ tsdb:
# CLI flag: -blocks-storage.tsdb.index-lookup-planning.label-cardinality-for-smaller-sketch
[label_cardinality_for_smaller_sketch: <int> | default = 1000]

# (advanced) Probability of sampling a label value for regex selectivity
# estimation (0.0-1.0). Sampled values are held in memory in addition to
# series. Set to 0 to disable; regexes will be estimated to select 10% of
# values.
# CLI flag: -blocks-storage.tsdb.index-lookup-planning.sample-values-probability
[sample_values_probability: <float> | default = 0.01]

# (advanced) Maximum number of sampled values to store per label name. Set
# to 0 to disable; regexes will be estimated to select 10% of values.
# CLI flag: -blocks-storage.tsdb.index-lookup-planning.sample-values-max-count
[sample_values_max_count: <int> | default = 1024]

# (advanced) Maximum total size in bytes of sampled values per label name.
# Set to 0 to disable; regexes will be estimated to select 10% of values.
# CLI flag: -blocks-storage.tsdb.index-lookup-planning.sample-values-max-bytes
[sample_values_max_bytes: <int> | default = 65536]

# (experimental) Controls the collection of statistics and whether to defer
# some vector selector matchers to sequential scans. This leads to better
# performance.
Expand Down
3 changes: 3 additions & 0 deletions operations/mimir/mimir-flags-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,9 @@
"blocks-storage.tsdb.index-lookup-planning.min-series-per-block-for-query-planning": 10000,
"blocks-storage.tsdb.index-lookup-planning.label-cardinality-for-larger-sketch": 1000000,
"blocks-storage.tsdb.index-lookup-planning.label-cardinality-for-smaller-sketch": 1000,
"blocks-storage.tsdb.index-lookup-planning.sample-values-probability": 0.01,
"blocks-storage.tsdb.index-lookup-planning.sample-values-max-count": 1024,
"blocks-storage.tsdb.index-lookup-planning.sample-values-max-bytes": 65536,
"blocks-storage.tsdb.index-lookup-planning.comparison-portion": 0,
"blocks-storage.tsdb.index-lookup-planning.statistics-collection-frequency": 3600000000000,
"compactor.block-ranges": [],
Expand Down
3 changes: 1 addition & 2 deletions pkg/ingester/lookupplan/branch_and_bound.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"math"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/tsdb/index"

"github.com/grafana/mimir/pkg/storage/sharding"
)
Expand Down Expand Up @@ -64,7 +63,7 @@ func (pq plans) Iterator() iter.Seq[plan] {

// generatePlansBranchAndBound uses branch-and-bound to explore the space of possible plans.
// It prunes branches that cannot possibly lead to a better plan than the current best.
func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, statistics index.Statistics, matchers []*labels.Matcher, pools *costBasedPlannerPools, shard *sharding.ShardSelector) iter.Seq[plan] {
func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, statistics Statistics, matchers []*labels.Matcher, pools *costBasedPlannerPools, shard *sharding.ShardSelector) iter.Seq[plan] {
// Initialize priority queue with the root plan (all predicates undecided)
prospectPlans := pools.GetPlans(maxPlansForPlanning)

Expand Down
22 changes: 22 additions & 0 deletions pkg/ingester/lookupplan/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@ const (
DefaultMinSeriesPerBlockForQueryPlanning = 10_000
DefaultLabelCardinalityForLargerSketch = 1e6
DefaultLabelCardinalityForSmallerSketch = 1e3
DefaultSampleValuesProbability = 0.01 // 1% sample rate
DefaultSampleValuesMaxCount = 1024
DefaultSampleValuesMaxBytes = 64 * 1024 // 64KB
)

var defaultCostConfig = CostConfig{
RetrievedPostingCost: DefaultRetrievedPostingCost,
RetrievedSeriesCost: DefaultRetrievedSeriesCost,
RetrievedPostingListCost: DefaultRetrievedPostingListCost,
MinSeriesPerBlockForQueryPlanning: DefaultMinSeriesPerBlockForQueryPlanning,
SampleValuesProbability: DefaultSampleValuesProbability,
SampleValuesMaxCount: DefaultSampleValuesMaxCount,
SampleValuesMaxBytes: DefaultSampleValuesMaxBytes,
}

type CostConfig struct {
Expand All @@ -43,6 +49,19 @@ type CostConfig struct {

// LabelCardinalityForSmallerSketch is the number of series with a label for that label name to be allocated a smaller count-min sketch.
LabelCardinalityForSmallerSketch uint64 `yaml:"label_cardinality_for_smaller_sketch" category:"advanced"`

// SampleValuesProbability is the probability of sampling a label value for selectivity estimation.
// Sampled values are held in memory in addition to the in-memory storage of series.
// Set to 0 to disable sampling; regular expressions will be estimated to select 10% of all values of a label.
SampleValuesProbability float64 `yaml:"sample_values_probability" category:"advanced"`

// SampleValuesMaxCount is the maximum number of sampled values to store per label name.
// Set to 0 to disable sampling; regular expressions will be estimated to select 10% of all values of a label.
SampleValuesMaxCount int `yaml:"sample_values_max_count" category:"advanced"`

// SampleValuesMaxBytes is the maximum total size in bytes of sampled values per label name.
// Set to 0 to disable sampling; regular expressions will be estimated to select 10% of all values of a label.
SampleValuesMaxBytes int `yaml:"sample_values_max_bytes" category:"advanced"`
}

func (cfg *CostConfig) RegisterFlags(f *flag.FlagSet, prefix string) {
Expand All @@ -52,6 +71,9 @@ func (cfg *CostConfig) RegisterFlags(f *flag.FlagSet, prefix string) {
f.Uint64Var(&cfg.MinSeriesPerBlockForQueryPlanning, prefix+"min-series-per-block-for-query-planning", DefaultMinSeriesPerBlockForQueryPlanning, "Minimum number of series a block must have for query planning to be used.")
f.Uint64Var(&cfg.LabelCardinalityForLargerSketch, prefix+"label-cardinality-for-larger-sketch", DefaultLabelCardinalityForLargerSketch, "Number of series for a label name above which larger count-min sketches are used for that label.")
f.Uint64Var(&cfg.LabelCardinalityForSmallerSketch, prefix+"label-cardinality-for-smaller-sketch", DefaultLabelCardinalityForSmallerSketch, "Number of series for a label name above which smaller count-min sketches are used for that label.")
f.Float64Var(&cfg.SampleValuesProbability, prefix+"sample-values-probability", DefaultSampleValuesProbability, "Probability of sampling a label value for regex selectivity estimation (0.0-1.0). Sampled values are held in memory in addition to series. Set to 0 to disable; regexes will be estimated to select 10% of values.")
f.IntVar(&cfg.SampleValuesMaxCount, prefix+"sample-values-max-count", DefaultSampleValuesMaxCount, "Maximum number of sampled values to store per label name. Set to 0 to disable; regexes will be estimated to select 10% of values.")
f.IntVar(&cfg.SampleValuesMaxBytes, prefix+"sample-values-max-bytes", DefaultSampleValuesMaxBytes, "Maximum total size in bytes of sampled values per label name. Set to 0 to disable; regexes will be estimated to select 10% of values.")
}

func (cfg *CostConfig) Validate() error {
Expand Down
20 changes: 19 additions & 1 deletion pkg/ingester/lookupplan/mock_statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"fmt"
)

// mockStatistics implements the index.Statistics interface with hardcoded data for testing
// mockStatistics implements the Statistics interface with hardcoded data for testing
type mockStatistics struct {
// seriesPerValue maps label name -> label value -> number of series
seriesPerValue map[string]map[string]uint64
Expand Down Expand Up @@ -93,6 +93,24 @@ func (m *mockStatistics) LabelValuesCardinality(_ context.Context, name string,
return total
}

func (m *mockStatistics) SampleValues(_ context.Context, name string) []string {
labelValues := m.seriesPerValue[name]
if len(labelValues) == 0 {
return nil
}

// Return DefaultSampleValuesProbability of all values
numSamples := max(1, int(DefaultSampleValuesProbability*float64(len(labelValues))))
samples := make([]string, 0, numSamples)
for value := range labelValues {
if len(samples) >= numSamples {
break
}
samples = append(samples, value)
}
return samples
}

// newHighCardinalityMockStatistics creates a mockStatistics with higher cardinality
// to test the planner's behavior with realistic scale data
func newHighCardinalityMockStatistics() *mockStatistics {
Expand Down
5 changes: 2 additions & 3 deletions pkg/ingester/lookupplan/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"slices"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/tsdb/index"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

Expand Down Expand Up @@ -38,7 +37,7 @@ type plan struct {
}

// newScanOnlyPlan returns a plan in which all predicates would be used to scan and none to reach from the index.
func newScanOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
func newScanOnlyPlan(ctx context.Context, stats Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
p := plan{
predicates: make([]planPredicate, 0, len(matchers)),
indexPredicate: make([]bool, 0, len(matchers)),
Expand All @@ -58,7 +57,7 @@ func newScanOnlyPlan(ctx context.Context, stats index.Statistics, config CostCon
return p
}

func newIndexOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
func newIndexOnlyPlan(ctx context.Context, stats Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
p := newScanOnlyPlan(ctx, stats, config, matchers, predicatesPool, shard)
for i := range p.indexPredicate {
p.indexPredicate[i] = true
Expand Down
4 changes: 2 additions & 2 deletions pkg/ingester/lookupplan/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ func (p *costBasedPlannerPools) Release() {

type CostBasedPlanner struct {
config CostConfig
stats index.Statistics
stats Statistics
metrics Metrics
}

func NewCostBasedPlanner(metrics Metrics, statistics index.Statistics, config CostConfig) *CostBasedPlanner {
func NewCostBasedPlanner(metrics Metrics, statistics Statistics, config CostConfig) *CostBasedPlanner {
return &CostBasedPlanner{
config: config,
metrics: metrics,
Expand Down
2 changes: 1 addition & 1 deletion pkg/ingester/lookupplan/planner_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (p *PlannerFactory) CreatePlanner(meta tsdb.BlockMeta, reader tsdb.IndexRea
level.Info(logger).Log("msg", "skipping query planning for small block", "planning_threshold_series", p.config.MinSeriesPerBlockForQueryPlanning)
return NoopPlanner{}
}
stats, err := p.statsGenerator.Stats(meta, reader, p.config.LabelCardinalityForSmallerSketch, p.config.LabelCardinalityForLargerSketch)
stats, err := p.statsGenerator.Stats(meta, reader, p.config)
if err != nil {
level.Warn(logger).Log("msg", "failed to generate statistics; queries for this block won't use query planning", "err", err)
return NoopPlanner{}
Expand Down
15 changes: 7 additions & 8 deletions pkg/ingester/lookupplan/predicate.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"context"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/tsdb/index"
)

type planPredicate struct {
Expand All @@ -25,7 +24,7 @@ type planPredicate struct {
indexScanCost float64
}

func newPlanPredicate(ctx context.Context, m *labels.Matcher, stats index.Statistics, config CostConfig) planPredicate {
func newPlanPredicate(ctx context.Context, m *labels.Matcher, stats Statistics, config CostConfig) planPredicate {
pred := planPredicate{
matcher: m,
singleMatchCost: m.SingleMatchCost(),
Expand Down Expand Up @@ -66,7 +65,7 @@ func estimatePredicateIndexScanCost(pred planPredicate, m *labels.Matcher) float
panic("estimatePredicateIndexScanCost called with unhandled matcher type: " + m.Type.String() + m.String())
}

func estimatePredicateCardinality(ctx context.Context, m *labels.Matcher, stats index.Statistics, selectivity float64) uint64 {
func estimatePredicateCardinality(ctx context.Context, m *labels.Matcher, stats Statistics, selectivity float64) uint64 {
switch m.Type {
case labels.MatchEqual:
return estimateEqualMatcherCardinality(ctx, m, stats)
Expand All @@ -81,23 +80,23 @@ func estimatePredicateCardinality(ctx context.Context, m *labels.Matcher, stats
}
}

func estimateEqualMatcherCardinality(ctx context.Context, m *labels.Matcher, stats index.Statistics) uint64 {
func estimateEqualMatcherCardinality(ctx context.Context, m *labels.Matcher, stats Statistics) uint64 {
if m.Matches("") { // foo=""
return numSeriesWithoutLabel(ctx, m.Name, stats)
}
// foo="bar"
return stats.LabelValuesCardinality(ctx, m.Name, m.Value)
}

func estimateNotEqualMatcherCardinality(ctx context.Context, m *labels.Matcher, stats index.Statistics) uint64 {
func estimateNotEqualMatcherCardinality(ctx context.Context, m *labels.Matcher, stats Statistics) uint64 {
if m.Value == "" { // foo!=""
return stats.LabelValuesCardinality(ctx, m.Name)
}
// foo!="bar" matches all series except those with foo="bar"
return stats.TotalSeries() - stats.LabelValuesCardinality(ctx, m.Name, m.Value)
}

func estimateRegexMatcherCardinality(ctx context.Context, m *labels.Matcher, stats index.Statistics, selectivity float64) uint64 {
func estimateRegexMatcherCardinality(ctx context.Context, m *labels.Matcher, stats Statistics, selectivity float64) uint64 {
var matchedSeries uint64
if setMatches := m.SetMatches(); len(setMatches) > 0 { // foo=~"bar|baz", foo=~"|bar"
matchedSeries = stats.LabelValuesCardinality(ctx, m.Name, setMatches...)
Expand All @@ -113,7 +112,7 @@ func estimateRegexMatcherCardinality(ctx context.Context, m *labels.Matcher, sta
return matchedSeries
}

func estimateNotRegexMatcherCardinality(ctx context.Context, m *labels.Matcher, stats index.Statistics, selectivity float64) uint64 {
func estimateNotRegexMatcherCardinality(ctx context.Context, m *labels.Matcher, stats Statistics, selectivity float64) uint64 {
matchedSeries := uint64(0)

// Calculate how many series are matched by the regex as if the regex was positive.
Expand All @@ -135,7 +134,7 @@ func estimateNotRegexMatcherCardinality(ctx context.Context, m *labels.Matcher,
return matchedSeries
}

func numSeriesWithoutLabel(ctx context.Context, labelName string, stats index.Statistics) uint64 {
func numSeriesWithoutLabel(ctx context.Context, labelName string, stats Statistics) uint64 {
return stats.TotalSeries() - stats.LabelValuesCardinality(ctx, labelName)
}

Expand Down
26 changes: 26 additions & 0 deletions pkg/ingester/lookupplan/statistics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// SPDX-License-Identifier: AGPL-3.0-only

package lookupplan

import "context"

// Statistics provides cardinality information about a TSDB component for query planning.
type Statistics interface {
// TotalSeries returns the number of series in the TSDB component.
TotalSeries() uint64

// LabelValuesCount returns the number of values for a label name. If the given label name does not exist,
// it is valid to return 0.
LabelValuesCount(ctx context.Context, name string) uint64

// LabelValuesCardinality returns the cardinality of a given label name (i.e., the number of series which
// contain that label name). If values are provided, it returns the combined cardinality of all given values;
// otherwise, it returns the total cardinality across all values for the label name. If the label name does not exist,
// it returns 0.
LabelValuesCardinality(ctx context.Context, name string, values ...string) uint64

// SampleValues returns a representative sample of label values for the given label name.
// This is used for selectivity estimation of regex matchers.
// Returns nil if no sample is available.
SampleValues(ctx context.Context, name string) []string
}
Loading
Loading