Skip to content

Commit 10d0990

Browse files
committed
fix alerting per cluster on slack and pushover
1 parent db7bb27 commit 10d0990

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

cluster/cluster.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@ import (
2020
"time"
2121

2222
"github.com/BurntSushi/toml"
23+
"github.com/bluele/logrus_slack"
2324
"github.com/signal18/replication-manager/cluster/configurator"
2425
"github.com/signal18/replication-manager/cluster/nbc"
2526
"github.com/signal18/replication-manager/config"
2627
v3 "github.com/signal18/replication-manager/repmanv3"
2728
"github.com/signal18/replication-manager/router/maxscale"
2829
"github.com/signal18/replication-manager/utils/cron"
2930
"github.com/signal18/replication-manager/utils/dbhelper"
31+
"github.com/signal18/replication-manager/utils/logrus/hooks/pushover"
3032
"github.com/signal18/replication-manager/utils/s18log"
3133
"github.com/signal18/replication-manager/utils/state"
3234
log "github.com/sirupsen/logrus"
@@ -95,7 +97,9 @@ type Cluster struct {
9597
DBIndexSize int64 `json:"dbIndexSize"`
9698
Connections int `json:"connections"`
9799
QPS int64 `json:"qps"`
100+
LogVault *log.Logger `json:"-"`
98101
Log s18log.HttpLog `json:"log"`
102+
LogSlack *log.Logger `json:"-"`
99103
JobResults map[string]*JobResult `json:"jobResults"`
100104
Grants map[string]string `json:"-"`
101105
tlog *s18log.TermLog `json:"-"`
@@ -252,7 +256,7 @@ const (
252256
)
253257

254258
// Init initial cluster definition
255-
func (cluster *Cluster) Init(conf config.Config, cfgGroup string, tlog *s18log.TermLog, log *s18log.HttpLog, termlength int, runUUID string, repmgrVersion string, repmgrHostname string, key []byte) error {
259+
func (cluster *Cluster) Init(conf config.Config, cfgGroup string, tlog *s18log.TermLog, loghttp *s18log.HttpLog, termlength int, runUUID string, repmgrVersion string, repmgrHostname string, key []byte) error {
256260
cluster.SqlErrorLog = logsql.New()
257261
cluster.SqlGeneralLog = logsql.New()
258262
cluster.crcTable = crc64.MakeTable(crc64.ECMA) // http://golang.org/pkg/hash/crc64/#pkg-constants
@@ -270,7 +274,7 @@ func (cluster *Cluster) Init(conf config.Config, cfgGroup string, tlog *s18log.T
270274
cluster.testStopCluster = true
271275
cluster.testStartCluster = true
272276
cluster.tlog = tlog
273-
cluster.htlog = log
277+
cluster.htlog = loghttp
274278
cluster.termlength = termlength
275279
cluster.Name = cfgGroup
276280
cluster.WorkingDir = conf.WorkingDir + "/" + cluster.Name
@@ -307,6 +311,29 @@ func (cluster *Cluster) Init(conf config.Config, cfgGroup string, tlog *s18log.T
307311
os.MkdirAll(cluster.Conf.WorkingDir+"/"+cluster.Name, os.ModePerm)
308312
}
309313

314+
cluster.LogVault = log.New()
315+
316+
if cluster.Conf.PushoverAppToken != "" && cluster.Conf.PushoverUserToken != "" {
317+
cluster.LogVault.AddHook(
318+
pushover.NewHook(cluster.Conf.PushoverAppToken, cluster.Conf.PushoverUserToken),
319+
)
320+
cluster.LogVault.SetLevel(log.WarnLevel)
321+
}
322+
323+
cluster.LogSlack = log.New()
324+
325+
if cluster.Conf.SlackURL != "" {
326+
cluster.LogSlack.AddHook(&logrus_slack.SlackHook{
327+
HookURL: cluster.Conf.SlackURL,
328+
AcceptedLevels: logrus_slack.LevelThreshold(log.WarnLevel),
329+
Channel: cluster.Conf.SlackChannel,
330+
IconEmoji: ":ghost:",
331+
Username: cluster.Conf.SlackUser,
332+
Timeout: 5 * time.Second, // request timeout for calling slack api
333+
})
334+
}
335+
cluster.LogPrintf("ALERT", "Replication manager init cluster version : %s", cluster.Conf.Version)
336+
310337
hookerr, err := s18log.NewRotateFileHook(s18log.RotateFileConfig{
311338
Filename: cluster.WorkingDir + "/sql_error.log",
312339
MaxSize: cluster.Conf.LogRotateMaxSize,

cluster/cluster_log.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,32 @@ func (cluster *Cluster) LogPrintf(level string, format string, args ...interface
171171
switch level {
172172
case "ERROR":
173173
log.WithField("cluster", cluster.Name).Errorf(cliformat, args...)
174+
if cluster.Conf.SlackURL != "" {
175+
cluster.LogSlack.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Errorf(cliformat, args...)
176+
}
177+
174178
case "INFO":
175179
log.WithField("cluster", cluster.Name).Infof(cliformat, args...)
176180
case "DEBUG":
177181
log.WithField("cluster", cluster.Name).Debugf(cliformat, args...)
178182
case "WARN":
179183
log.WithField("cluster", cluster.Name).Warnf(cliformat, args...)
184+
if cluster.Conf.SlackURL != "" {
185+
cluster.LogSlack.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Errorf(cliformat, args...)
186+
}
180187
case "TEST":
181188
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "test"}).Infof(cliformat, args...)
182189
case "BENCH":
183190
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "benchmark"}).Infof(cliformat, args...)
184191
case "ALERT":
185-
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Warnf(cliformat, args...)
192+
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Errorf(cliformat, args...)
193+
if cluster.Conf.SlackURL != "" {
194+
cluster.LogSlack.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Errorf(cliformat, args...)
195+
}
196+
if cluster.Conf.PushoverAppToken != "" && cluster.Conf.PushoverUserToken != "" {
197+
cluster.LogVault.WithFields(log.Fields{"cluster": cluster.Name, "type": "alert"}).Errorf(cliformat, args...)
198+
199+
}
186200
case "STATE":
187201
status := cliformat[0:6]
188202
code := cliformat[7:15]

0 commit comments

Comments
 (0)