Skip to content

Commit 0d7fa59

Browse files
committed
feat: configurable provider channel buffer length
This change makes the memory provider channel buffer length configurable. The previously hard-coded value of 200 is used as default. This should result in better performance and less timeouts during sudden alert spikes, specially if the pipeline speed is affected by slow receivers. Signed-off-by: Siavash Safi <[email protected]>
1 parent 7980594 commit 0d7fa59

File tree

2 files changed

+28
-22
lines changed

2 files changed

+28
-22
lines changed

cmd/alertmanager/main.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,14 @@ func run() int {
143143
}
144144

145145
var (
146-
configFile = kingpin.Flag("config.file", "Alertmanager configuration file name.").Default("alertmanager.yml").String()
147-
dataDir = kingpin.Flag("storage.path", "Base path for data storage.").Default("data/").String()
148-
retention = kingpin.Flag("data.retention", "How long to keep data for.").Default("120h").Duration()
149-
maintenanceInterval = kingpin.Flag("data.maintenance-interval", "Interval between garbage collection and snapshotting to disk of the silences and the notification logs.").Default("15m").Duration()
150-
maxSilences = kingpin.Flag("silences.max-silences", "Maximum number of silences, including expired silences. If negative or zero, no limit is set.").Default("0").Int()
151-
maxSilenceSizeBytes = kingpin.Flag("silences.max-silence-size-bytes", "Maximum silence size in bytes. If negative or zero, no limit is set.").Default("0").Int()
152-
alertGCInterval = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
146+
configFile = kingpin.Flag("config.file", "Alertmanager configuration file name.").Default("alertmanager.yml").String()
147+
dataDir = kingpin.Flag("storage.path", "Base path for data storage.").Default("data/").String()
148+
retention = kingpin.Flag("data.retention", "How long to keep data for.").Default("120h").Duration()
149+
maintenanceInterval = kingpin.Flag("data.maintenance-interval", "Interval between garbage collection and snapshotting to disk of the silences and the notification logs.").Default("15m").Duration()
150+
maxSilences = kingpin.Flag("silences.max-silences", "Maximum number of silences, including expired silences. If negative or zero, no limit is set.").Default("0").Int()
151+
maxSilenceSizeBytes = kingpin.Flag("silences.max-silence-size-bytes", "Maximum silence size in bytes. If negative or zero, no limit is set.").Default("0").Int()
152+
alertChannelBufferLength = kingpin.Flag("alerts.channel-buffer-length", "Alert channel buffer length").Default("200").Int()
153+
alertGCInterval = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
153154

154155
webConfig = webflag.AddFlags(kingpin.CommandLine, ":9093")
155156
externalURL = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
@@ -342,7 +343,7 @@ func run() int {
342343
go peer.Settle(ctx, *gossipInterval*10)
343344
}
344345

345-
alerts, err := mem.NewAlerts(context.Background(), marker, *alertGCInterval, nil, logger, prometheus.DefaultRegisterer)
346+
alerts, err := mem.NewAlerts(context.Background(), marker, *alertChannelBufferLength, *alertGCInterval, nil, logger, prometheus.DefaultRegisterer)
346347
if err != nil {
347348
logger.Error("error creating memory provider", "err", err)
348349
return 1

provider/mem/mem.go

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package mem
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"log/slog"
1920
"sync"
2021
"time"
@@ -27,8 +28,6 @@ import (
2728
"github.com/prometheus/alertmanager/types"
2829
)
2930

30-
const alertChannelLength = 200
31-
3231
// Alerts gives access to a set of alerts. All methods are goroutine-safe.
3332
type Alerts struct {
3433
cancel context.CancelFunc
@@ -38,8 +37,9 @@ type Alerts struct {
3837
alerts *store.Alerts
3938
marker types.AlertMarker
4039

41-
listeners map[int]listeningAlerts
42-
next int
40+
channelLength int
41+
listeners map[int]listeningAlerts
42+
next int
4343

4444
callback AlertStoreCallback
4545

@@ -85,20 +85,25 @@ func (a *Alerts) registerMetrics(r prometheus.Registerer) {
8585
}
8686

8787
// NewAlerts returns a new alert provider.
88-
func NewAlerts(ctx context.Context, m types.AlertMarker, intervalGC time.Duration, alertCallback AlertStoreCallback, l *slog.Logger, r prometheus.Registerer) (*Alerts, error) {
88+
func NewAlerts(ctx context.Context, m types.AlertMarker, channelLength int, intervalGC time.Duration, alertCallback AlertStoreCallback, l *slog.Logger, r prometheus.Registerer) (*Alerts, error) {
8989
if alertCallback == nil {
9090
alertCallback = noopCallback{}
9191
}
9292

93+
if channelLength < 1 {
94+
return nil, fmt.Errorf("channel length has to be > zero, provided %d", channelLength)
95+
}
96+
9397
ctx, cancel := context.WithCancel(ctx)
9498
a := &Alerts{
95-
marker: m,
96-
alerts: store.NewAlerts(),
97-
cancel: cancel,
98-
listeners: map[int]listeningAlerts{},
99-
next: 0,
100-
logger: l.With("component", "provider"),
101-
callback: alertCallback,
99+
marker: m,
100+
alerts: store.NewAlerts(),
101+
cancel: cancel,
102+
channelLength: channelLength,
103+
listeners: map[int]listeningAlerts{},
104+
next: 0,
105+
logger: l.With("component", "provider"),
106+
callback: alertCallback,
102107
}
103108

104109
if r != nil {
@@ -170,7 +175,7 @@ func (a *Alerts) Subscribe() provider.AlertIterator {
170175
var (
171176
done = make(chan struct{})
172177
alerts = a.alerts.List()
173-
ch = make(chan *types.Alert, max(len(alerts), alertChannelLength))
178+
ch = make(chan *types.Alert, max(len(alerts), a.channelLength))
174179
)
175180

176181
for _, a := range alerts {
@@ -187,7 +192,7 @@ func (a *Alerts) Subscribe() provider.AlertIterator {
187192
// pending notifications.
188193
func (a *Alerts) GetPending() provider.AlertIterator {
189194
var (
190-
ch = make(chan *types.Alert, alertChannelLength)
195+
ch = make(chan *types.Alert, a.channelLength)
191196
done = make(chan struct{})
192197
)
193198
a.mtx.Lock()

0 commit comments

Comments
 (0)