Skip to content

Commit 4a7d9f1

Browse files
committed
feat: add support for custom labels in Prometheus metrics
- Add a `Labels` field to the `Config` struct to include additional labels for metrics - Add a `Labels` field to the `Endpoint` struct to include key-value pairs for endpoint metrics - Inline the `CheckSSHBanner` call in the `call` function of the `Endpoint` struct - Import the `metrics` package in `main.go` and initialize Prometheus metrics - Modify `initializePrometheusMetrics` to accept a `Config` parameter and include labels in metric definitions - Update `PublishMetricsForEndpoint` to handle labels and include them in metric values - Update `metrics_test.go` to initialize Prometheus metrics with a `Config` and pass labels to `PublishMetricsForEndpoint` - Pass labels to the `monitor` and `execute` functions in `watchdog.go` and use them in metric publishing Signed-off-by: appleboy <[email protected]>
1 parent dd839be commit 4a7d9f1

File tree

6 files changed

+53
-36
lines changed

6 files changed

+53
-36
lines changed

config/config.go

+3
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ type Config struct {
5959
// Metrics Whether to expose metrics at /metrics
6060
Metrics bool `yaml:"metrics,omitempty"`
6161

62+
// Labels is a list of labels that will be added to all metrics
63+
Labels []string `yaml:"labels,omitempty"`
64+
6265
// SkipInvalidConfigUpdate Whether to make the application ignore invalid configuration
6366
// if the configuration file is updated while the application is running
6467
SkipInvalidConfigUpdate bool `yaml:"skip-invalid-config-update,omitempty"`

config/endpoint/endpoint.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ type Endpoint struct {
9595
// Headers of the request
9696
Headers map[string]string `yaml:"headers,omitempty"`
9797

98+
// Labels are key-value pairs that can be used to metric the endpoint
99+
Labels map[string]string `yaml:"labels,omitempty"`
100+
98101
// Interval is the duration to wait between every status check
99102
Interval time.Duration `yaml:"interval,omitempty"`
100103

@@ -365,8 +368,7 @@ func (e *Endpoint) call(result *Result) {
365368
} else if endpointType == TypeSSH {
366369
// If there's no username/password specified, attempt to validate just the SSH banner
367370
if len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0 {
368-
result.Connected, result.HTTPStatus, err =
369-
client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
371+
result.Connected, result.HTTPStatus, err = client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
370372
if err != nil {
371373
result.AddError(err.Error())
372374
return

main.go

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/TwiN/gatus/v5/config"
1111
"github.com/TwiN/gatus/v5/controller"
12+
"github.com/TwiN/gatus/v5/metrics"
1213
"github.com/TwiN/gatus/v5/storage/store"
1314
"github.com/TwiN/gatus/v5/watchdog"
1415
"github.com/TwiN/logr"
@@ -49,6 +50,8 @@ func main() {
4950

5051
func start(cfg *config.Config) {
5152
go controller.Handle(cfg)
53+
// Initialize the metrics
54+
metrics.InitializePrometheusMetrics(cfg)
5255
watchdog.Monitor(cfg)
5356
go listenToConfigurationFileChanges(cfg)
5457
}

metrics/metrics.go

+25-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package metrics
33
import (
44
"strconv"
55

6+
"github.com/TwiN/gatus/v5/config"
67
"github.com/TwiN/gatus/v5/config/endpoint"
78
"github.com/prometheus/client_golang/prometheus"
89
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -11,8 +12,6 @@ import (
1112
const namespace = "gatus" // The prefix of the metrics
1213

1314
var (
14-
initializedMetrics bool // Whether the metrics have been initialized
15-
1615
resultTotal *prometheus.CounterVec
1716
resultDurationSeconds *prometheus.GaugeVec
1817
resultConnectedTotal *prometheus.CounterVec
@@ -21,64 +20,69 @@ var (
2120
resultEndpointSuccess *prometheus.GaugeVec
2221
)
2322

24-
func initializePrometheusMetrics() {
23+
func InitializePrometheusMetrics(cfg *config.Config) {
2524
resultTotal = promauto.NewCounterVec(prometheus.CounterOpts{
2625
Namespace: namespace,
2726
Name: "results_total",
2827
Help: "Number of results per endpoint",
29-
}, []string{"key", "group", "name", "type", "success"})
28+
}, append([]string{"key", "group", "name", "type", "success"}, cfg.Labels...))
3029
resultDurationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
3130
Namespace: namespace,
3231
Name: "results_duration_seconds",
3332
Help: "Duration of the request in seconds",
34-
}, []string{"key", "group", "name", "type"})
33+
}, append([]string{"key", "group", "name", "type"}, cfg.Labels...))
3534
resultConnectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
3635
Namespace: namespace,
3736
Name: "results_connected_total",
3837
Help: "Total number of results in which a connection was successfully established",
39-
}, []string{"key", "group", "name", "type"})
38+
}, append([]string{"key", "group", "name", "type"}, cfg.Labels...))
4039
resultCodeTotal = promauto.NewCounterVec(prometheus.CounterOpts{
4140
Namespace: namespace,
4241
Name: "results_code_total",
4342
Help: "Total number of results by code",
44-
}, []string{"key", "group", "name", "type", "code"})
43+
}, append([]string{"key", "group", "name", "type", "code"}, cfg.Labels...))
4544
resultCertificateExpirationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
4645
Namespace: namespace,
4746
Name: "results_certificate_expiration_seconds",
4847
Help: "Number of seconds until the certificate expires",
49-
}, []string{"key", "group", "name", "type"})
48+
}, append([]string{"key", "group", "name", "type"}, cfg.Labels...))
5049
resultEndpointSuccess = promauto.NewGaugeVec(prometheus.GaugeOpts{
5150
Namespace: namespace,
5251
Name: "results_endpoint_success",
5352
Help: "Displays whether or not the endpoint was a success",
54-
}, []string{"key", "group", "name", "type"})
53+
}, append([]string{"key", "group", "name", "type"}, cfg.Labels...))
5554
}
5655

5756
// PublishMetricsForEndpoint publishes metrics for the given endpoint and its result.
5857
// These metrics will be exposed at /metrics if the metrics are enabled
59-
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result) {
60-
if !initializedMetrics {
61-
initializePrometheusMetrics()
62-
initializedMetrics = true
58+
func PublishMetricsForEndpoint(labels []string, ep *endpoint.Endpoint, result *endpoint.Result) {
59+
labelValues := []string{}
60+
for _, label := range labels {
61+
if value, ok := ep.Labels[label]; ok {
62+
labelValues = append(labelValues, value)
63+
} else {
64+
labelValues = append(labelValues, "")
65+
}
6366
}
67+
6468
endpointType := ep.Type()
65-
resultTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)).Inc()
66-
resultDurationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.Duration.Seconds())
69+
resultTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)}, labelValues...)...).Inc()
70+
resultDurationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.Duration.Seconds())
6771
if result.Connected {
68-
resultConnectedTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Inc()
72+
resultConnectedTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Inc()
6973
}
7074
if result.DNSRCode != "" {
71-
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode).Inc()
75+
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode}, labelValues...)...).Inc()
7276
}
7377
if result.HTTPStatus != 0 {
74-
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)).Inc()
78+
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)}, labelValues...)...).Inc()
7579
}
7680
if result.CertificateExpiration != 0 {
77-
resultCertificateExpirationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.CertificateExpiration.Seconds())
81+
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
7882
}
7983
if result.Success {
80-
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(1)
84+
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
8185
} else {
82-
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(0)
86+
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(0)
8387
}
8488
}

metrics/metrics_test.go

+12-7
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,18 @@ import (
55
"testing"
66
"time"
77

8+
"github.com/TwiN/gatus/v5/config"
89
"github.com/TwiN/gatus/v5/config/endpoint"
910
"github.com/TwiN/gatus/v5/config/endpoint/dns"
1011
"github.com/prometheus/client_golang/prometheus"
1112
"github.com/prometheus/client_golang/prometheus/testutil"
1213
)
1314

1415
func TestPublishMetricsForEndpoint(t *testing.T) {
16+
InitializePrometheusMetrics(&config.Config{})
17+
1518
httpEndpoint := &endpoint.Endpoint{Name: "http-ep-name", Group: "http-ep-group", URL: "https://example.org"}
16-
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
19+
PublishMetricsForEndpoint([]string{}, httpEndpoint, &endpoint.Result{
1720
HTTPStatus: 200,
1821
Connected: true,
1922
Duration: 123 * time.Millisecond,
@@ -47,7 +50,7 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
4750
if err != nil {
4851
t.Errorf("Expected no errors but got: %v", err)
4952
}
50-
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
53+
PublishMetricsForEndpoint([]string{}, httpEndpoint, &endpoint.Result{
5154
HTTPStatus: 200,
5255
Connected: true,
5356
Duration: 125 * time.Millisecond,
@@ -82,11 +85,13 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
8285
if err != nil {
8386
t.Errorf("Expected no errors but got: %v", err)
8487
}
85-
dnsEndpoint := &endpoint.Endpoint{Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
86-
QueryType: "A",
87-
QueryName: "example.com.",
88-
}}
89-
PublishMetricsForEndpoint(dnsEndpoint, &endpoint.Result{
88+
dnsEndpoint := &endpoint.Endpoint{
89+
Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
90+
QueryType: "A",
91+
QueryName: "example.com.",
92+
},
93+
}
94+
PublishMetricsForEndpoint([]string{}, dnsEndpoint, &endpoint.Result{
9095
DNSRCode: "NOERROR",
9196
Connected: true,
9297
Duration: 50 * time.Millisecond,

watchdog/watchdog.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,31 @@ func Monitor(cfg *config.Config) {
3131
if endpoint.IsEnabled() {
3232
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
3333
time.Sleep(777 * time.Millisecond)
34-
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx)
34+
go monitor(cfg.Labels, endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx)
3535
}
3636
}
3737
}
3838

3939
// monitor a single endpoint in a loop
40-
func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) {
40+
func monitor(labels []string, ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) {
4141
// Run it immediately on start
42-
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
42+
execute(labels, ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
4343
// Loop for the next executions
4444
for {
4545
select {
4646
case <-ctx.Done():
4747
logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
4848
return
4949
case <-time.After(ep.Interval):
50-
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
50+
execute(labels, ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
5151
}
5252
}
5353
// Just in case somebody wandered all the way to here and wonders, "what about ExternalEndpoints?"
5454
// Alerting is checked every time an external endpoint is pushed to Gatus, so they're not monitored
5555
// periodically like they are for normal endpoints.
5656
}
5757

58-
func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) {
58+
func execute(labels []string, ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) {
5959
if !disableMonitoringLock {
6060
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
6161
// could cause performance issues and return inaccurate results
@@ -70,7 +70,7 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
7070
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
7171
result := ep.EvaluateHealth()
7272
if enabledMetrics {
73-
metrics.PublishMetricsForEndpoint(ep, result)
73+
metrics.PublishMetricsForEndpoint(labels, ep, result)
7474
}
7575
UpdateEndpointStatuses(ep, result)
7676
if logr.GetThreshold() == logr.LevelDebug && !result.Success {

0 commit comments

Comments
 (0)