Skip to content

Commit 8da0a2b

Browse files
authored
rule: Add support for query offset (#8158)
Support Prometheus rule manager upstream "query offset" feature. * Add support for a default rule query offset via command flag. * Add per rule group query_offset support. Fixes: #7596 Signed-off-by: SuperQ <[email protected]>
1 parent 1f5bff2 commit 8da0a2b

File tree

4 files changed

+48
-8
lines changed

4 files changed

+48
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
2525
- [#8017](https://github.com/thanos-io/thanos/pull/8017) Store Gateway: Use native histogram for binary reader load and download duration and fixed download duration metric. #8017
2626
- [#8131](https://github.com/thanos-io/thanos/pull/8131) Store Gateway: Optimize regex matchers for .* and .+. #8131
2727
- [#7808](https://github.com/thanos-io/thanos/pull/7808) Query: Support chain deduplication algorithm.
28+
- [#8158](https://github.com/thanos-io/thanos/pull/8158) Rule: Add support for query offset.
2829

2930
### Changed
3031

cmd/thanos/rule.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ type ruleConfig struct {
9999

100100
resendDelay time.Duration
101101
evalInterval time.Duration
102+
queryOffset time.Duration
102103
outageTolerance time.Duration
103104
forGracePeriod time.Duration
104105
ruleFiles []string
@@ -150,6 +151,8 @@ func registerRule(app *extkingpin.App) {
150151
Default("1m").DurationVar(&conf.resendDelay)
151152
cmd.Flag("eval-interval", "The default evaluation interval to use.").
152153
Default("1m").DurationVar(&conf.evalInterval)
154+
cmd.Flag("rule-query-offset", "The default rule group query_offset duration to use.").
155+
Default("0s").DurationVar(&conf.queryOffset)
153156
cmd.Flag("for-outage-tolerance", "Max time to tolerate prometheus outage for restoring \"for\" state of alert.").
154157
Default("1h").DurationVar(&conf.outageTolerance)
155158
cmd.Flag("for-grace-period", "Minimum duration between alert and restored \"for\" state. This is maintained only for alerts with configured \"for\" time greater than grace period.").
@@ -607,14 +610,15 @@ func runRule(
607610
}
608611

609612
managerOpts := rules.ManagerOptions{
610-
NotifyFunc: notifyFunc,
611-
Logger: logutil.GoKitLogToSlog(logger),
612-
Appendable: appendable,
613-
ExternalURL: nil,
614-
Queryable: queryable,
615-
ResendDelay: conf.resendDelay,
616-
OutageTolerance: conf.outageTolerance,
617-
ForGracePeriod: conf.forGracePeriod,
613+
NotifyFunc: notifyFunc,
614+
Logger: logutil.GoKitLogToSlog(logger),
615+
Appendable: appendable,
616+
ExternalURL: nil,
617+
Queryable: queryable,
618+
ResendDelay: conf.resendDelay,
619+
OutageTolerance: conf.outageTolerance,
620+
ForGracePeriod: conf.forGracePeriod,
621+
DefaultRuleQueryOffset: func() time.Duration { return conf.queryOffset },
618622
}
619623
if conf.ruleConcurrentEval > 1 {
620624
managerOpts.MaxConcurrentEvals = conf.ruleConcurrentEval

docs/components/rule.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The data of each Rule node can be labeled to satisfy the clusters labeling schem
1818
thanos rule \
1919
--data-dir "/path/to/data" \
2020
--eval-interval "30s" \
21+
--rule-query-offset "10s" \
2122
--rule-file "/path/to/rules/*.rules.yaml" \
2223
--alert.query-url "http://0.0.0.0:9090" \ # This tells what query URL to link to in UI.
2324
--alertmanagers.url "http://alert.thanos.io" \
@@ -64,6 +65,9 @@ name: <string>
6465
# How often rules in the group are evaluated.
6566
[ interval: <duration> | default = global.evaluation_interval ]
6667
68+
# Offset the rule evaluation timestamp of this particular group by the specified duration into the past.
69+
[ query_offset: <duration> | default = --rule-query-offset flag ]
70+
6771
rules:
6872
[ - <rule> ... ]
6973
```
@@ -471,6 +475,8 @@ Flags:
471475
Note that rules are not automatically detected,
472476
use SIGHUP or do HTTP POST /-/reload to re-read
473477
them.
478+
--rule-query-offset=0s The default rule group query_offset duration to
479+
use.
474480
--shipper.meta-file-name="thanos.shipper.json"
475481
the file to store shipper metadata in
476482
--shipper.upload-compacted

test/e2e/rule_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,16 @@ groups:
162162
expr: 1
163163
`
164164

165+
testRuleQueryOffset = `
166+
groups:
167+
- name: recording_query_offset
168+
interval: 1s
169+
query_offset: 1s
170+
rules:
171+
- record: metric_query_offset
172+
expr: 1
173+
`
174+
165175
amTimeout = model.Duration(10 * time.Second)
166176
)
167177

@@ -616,6 +626,25 @@ func TestRule_KeepFiringFor(t *testing.T) {
616626
})
617627
})
618628

629+
t.Run("rule query_offset", func(t *testing.T) {
630+
// Create a recording rule that will add the missing metric
631+
createRuleFile(t, filepath.Join(rulesPath, "record_metric_query_offset.yaml"), testRuleQueryOffset)
632+
reloadRulesHTTP(t, ctx, r.Endpoint("http"))
633+
634+
// Wait for metric to pop up
635+
queryWaitAndAssert(t, ctx, q.Endpoint("http"), func() string { return "metric_query_offset" }, time.Now, promclient.QueryOptions{
636+
Deduplicate: false,
637+
}, model.Vector{
638+
&model.Sample{
639+
Metric: model.Metric{
640+
"__name__": "metric_query_offset",
641+
"replica": "1",
642+
},
643+
Value: model.SampleValue(1),
644+
},
645+
})
646+
})
647+
619648
t.Run("keep_firing_for should continue triggering", func(t *testing.T) {
620649
// Alert should still be firing
621650
queryAndAssertSeries(t, ctx, q.Endpoint("http"), func() string { return "ALERTS" }, time.Now, promclient.QueryOptions{

0 commit comments

Comments
 (0)