Skip to content

Commit f045b90

Browse files
committed
feat: add dynamic labels support for Prometheus metrics
- Add `toBoolPtr` function to convert a bool to a bool pointer - Add `contains` function to check if a key exists in a slice - Add `GetMetricLabels` method to `Config` struct to return unique metric labels from enabled endpoints - Change file permission notation from `0644` to `0o644` in `config_test.go` - Add `Labels` field to `Endpoint` struct for key-value pairs - Initialize Prometheus metrics with dynamic labels from configuration - Modify `PublishMetricsForEndpoint` to include dynamic labels - Add test for `GetMetricLabels` method in `config_test.go` - Update `watchdog` to pass labels to monitoring and execution functions Signed-off-by: appleboy <[email protected]>
1 parent dd839be commit f045b90

File tree

7 files changed

+205
-40
lines changed

7 files changed

+205
-40
lines changed

config/config.go

+38
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,44 @@ type Config struct {
103103
lastFileModTime time.Time // last modification time
104104
}
105105

106+
// toBoolPtr converts a bool to a bool pointer
107+
func toBoolPtr(b bool) *bool {
108+
return &b
109+
}
110+
111+
// contains checks if a key exists in the slice
112+
func contains[T comparable](slice []T, key T) bool {
113+
for _, item := range slice {
114+
if item == key {
115+
return true
116+
}
117+
}
118+
return false
119+
}
120+
121+
// GetMetricLabels returns a slice of unique metric labels from all enabled endpoints
122+
// in the configuration. It iterates through each endpoint, checks if it is enabled,
123+
// and then collects unique labels from the endpoint's labels map.
124+
//
125+
// Returns:
126+
//
127+
// []string: A slice of unique metric labels.
128+
func (config *Config) GetMetricLabels() []string {
129+
labels := make([]string, 0)
130+
for _, ep := range config.Endpoints {
131+
if !ep.IsEnabled() {
132+
continue
133+
}
134+
for label := range ep.Labels {
135+
if contains(labels, label) {
136+
continue
137+
}
138+
labels = append(labels, label)
139+
}
140+
}
141+
return labels
142+
}
143+
106144
func (config *Config) GetEndpointByKey(key string) *endpoint.Endpoint {
107145
for i := 0; i < len(config.Endpoints); i++ {
108146
ep := config.Endpoints[i]

config/config_test.go

+115-4
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ endpoints:
237237
for _, scenario := range scenarios {
238238
t.Run(scenario.name, func(t *testing.T) {
239239
for path, content := range scenario.pathAndFiles {
240-
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0644); err != nil {
240+
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0o644); err != nil {
241241
t.Fatalf("[%s] failed to write file: %v", scenario.name, err)
242242
}
243243
}
@@ -282,7 +282,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
282282
url: https://twin.sh/health
283283
conditions:
284284
- "[STATUS] == 200"
285-
`), 0644)
285+
`), 0o644)
286286

287287
t.Run("config-file-as-config-path", func(t *testing.T) {
288288
config, err := LoadConfiguration(configFilePath)
@@ -298,7 +298,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
298298
- name: website
299299
url: https://twin.sh/health
300300
conditions:
301-
- "[STATUS] == 200"`), 0644); err != nil {
301+
- "[STATUS] == 200"`), 0o644); err != nil {
302302
t.Fatalf("failed to overwrite config file: %v", err)
303303
}
304304
if !config.HasLoadedConfigurationBeenModified() {
@@ -315,7 +315,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
315315
}
316316
time.Sleep(time.Second) // Because the file mod time only has second precision, we have to wait for a second
317317
// Update the config file
318-
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0644); err != nil {
318+
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0o644); err != nil {
319319
t.Fatalf("failed to overwrite config file: %v", err)
320320
}
321321
if !config.HasLoadedConfigurationBeenModified() {
@@ -1938,3 +1938,114 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
19381938
})
19391939
}
19401940
}
1941+
1942+
func TestConfig_GetMetricLabels(t *testing.T) {
1943+
tests := []struct {
1944+
name string
1945+
config *Config
1946+
expected []string
1947+
}{
1948+
{
1949+
name: "no-endpoints",
1950+
config: &Config{
1951+
Endpoints: []*endpoint.Endpoint{},
1952+
},
1953+
expected: []string{},
1954+
},
1955+
{
1956+
name: "single-endpoint-no-labels",
1957+
config: &Config{
1958+
Endpoints: []*endpoint.Endpoint{
1959+
{
1960+
Name: "endpoint1",
1961+
URL: "https://example.com",
1962+
},
1963+
},
1964+
},
1965+
expected: []string{},
1966+
},
1967+
{
1968+
name: "single-endpoint-with-labels",
1969+
config: &Config{
1970+
Endpoints: []*endpoint.Endpoint{
1971+
{
1972+
Name: "endpoint1",
1973+
URL: "https://example.com",
1974+
Enabled: toBoolPtr(true),
1975+
Labels: map[string]string{
1976+
"env": "production",
1977+
"team": "backend",
1978+
},
1979+
},
1980+
},
1981+
},
1982+
expected: []string{"env", "team"},
1983+
},
1984+
{
1985+
name: "multiple-endpoints-with-labels",
1986+
config: &Config{
1987+
Endpoints: []*endpoint.Endpoint{
1988+
{
1989+
Name: "endpoint1",
1990+
URL: "https://example.com",
1991+
Enabled: toBoolPtr(true),
1992+
Labels: map[string]string{
1993+
"env": "production",
1994+
"team": "backend",
1995+
"module": "auth",
1996+
},
1997+
},
1998+
{
1999+
Name: "endpoint2",
2000+
URL: "https://example.org",
2001+
Enabled: toBoolPtr(true),
2002+
Labels: map[string]string{
2003+
"env": "staging",
2004+
"team": "frontend",
2005+
},
2006+
},
2007+
},
2008+
},
2009+
expected: []string{"env", "team", "module"},
2010+
},
2011+
{
2012+
name: "multiple-endpoints-with-some-disabled",
2013+
config: &Config{
2014+
Endpoints: []*endpoint.Endpoint{
2015+
{
2016+
Name: "endpoint1",
2017+
URL: "https://example.com",
2018+
Enabled: toBoolPtr(true),
2019+
Labels: map[string]string{
2020+
"env": "production",
2021+
"team": "backend",
2022+
},
2023+
},
2024+
{
2025+
Name: "endpoint2",
2026+
URL: "https://example.org",
2027+
Enabled: toBoolPtr(false),
2028+
Labels: map[string]string{
2029+
"module": "auth",
2030+
},
2031+
},
2032+
},
2033+
},
2034+
expected: []string{"env", "team"},
2035+
},
2036+
}
2037+
2038+
for _, tt := range tests {
2039+
t.Run(tt.name, func(t *testing.T) {
2040+
labels := tt.config.GetMetricLabels()
2041+
if len(labels) != len(tt.expected) {
2042+
t.Errorf("expected %d labels, got %d", len(tt.expected), len(labels))
2043+
}
2044+
for _, label := range tt.expected {
2045+
if !contains(labels, label) {
2046+
t.Errorf("expected label %s to be present", label)
2047+
}
2048+
}
2049+
})
2050+
}
2051+
}

config/endpoint/endpoint.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ type Endpoint struct {
9595
// Headers of the request
9696
Headers map[string]string `yaml:"headers,omitempty"`
9797

98+
// Labels are key-value pairs that can be used to metric the endpoint
99+
Labels map[string]string `yaml:"labels,omitempty"`
100+
98101
// Interval is the duration to wait between every status check
99102
Interval time.Duration `yaml:"interval,omitempty"`
100103

@@ -365,8 +368,7 @@ func (e *Endpoint) call(result *Result) {
365368
} else if endpointType == TypeSSH {
366369
// If there's no username/password specified, attempt to validate just the SSH banner
367370
if len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0 {
368-
result.Connected, result.HTTPStatus, err =
369-
client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
371+
result.Connected, result.HTTPStatus, err = client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
370372
if err != nil {
371373
result.AddError(err.Error())
372374
return

main.go

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/TwiN/gatus/v5/config"
1111
"github.com/TwiN/gatus/v5/controller"
12+
"github.com/TwiN/gatus/v5/metrics"
1213
"github.com/TwiN/gatus/v5/storage/store"
1314
"github.com/TwiN/gatus/v5/watchdog"
1415
"github.com/TwiN/logr"
@@ -49,6 +50,8 @@ func main() {
4950

5051
func start(cfg *config.Config) {
5152
go controller.Handle(cfg)
53+
// Initialize the metrics
54+
metrics.InitializePrometheusMetrics(cfg)
5255
watchdog.Monitor(cfg)
5356
go listenToConfigurationFileChanges(cfg)
5457
}

metrics/metrics.go

+26-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package metrics
33
import (
44
"strconv"
55

6+
"github.com/TwiN/gatus/v5/config"
67
"github.com/TwiN/gatus/v5/config/endpoint"
78
"github.com/prometheus/client_golang/prometheus"
89
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -11,8 +12,6 @@ import (
1112
const namespace = "gatus" // The prefix of the metrics
1213

1314
var (
14-
initializedMetrics bool // Whether the metrics have been initialized
15-
1615
resultTotal *prometheus.CounterVec
1716
resultDurationSeconds *prometheus.GaugeVec
1817
resultConnectedTotal *prometheus.CounterVec
@@ -21,64 +20,70 @@ var (
2120
resultEndpointSuccess *prometheus.GaugeVec
2221
)
2322

24-
func initializePrometheusMetrics() {
23+
func InitializePrometheusMetrics(cfg *config.Config) {
24+
labels := cfg.GetMetricLabels()
2525
resultTotal = promauto.NewCounterVec(prometheus.CounterOpts{
2626
Namespace: namespace,
2727
Name: "results_total",
2828
Help: "Number of results per endpoint",
29-
}, []string{"key", "group", "name", "type", "success"})
29+
}, append([]string{"key", "group", "name", "type", "success"}, labels...))
3030
resultDurationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
3131
Namespace: namespace,
3232
Name: "results_duration_seconds",
3333
Help: "Duration of the request in seconds",
34-
}, []string{"key", "group", "name", "type"})
34+
}, append([]string{"key", "group", "name", "type"}, labels...))
3535
resultConnectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
3636
Namespace: namespace,
3737
Name: "results_connected_total",
3838
Help: "Total number of results in which a connection was successfully established",
39-
}, []string{"key", "group", "name", "type"})
39+
}, append([]string{"key", "group", "name", "type"}, labels...))
4040
resultCodeTotal = promauto.NewCounterVec(prometheus.CounterOpts{
4141
Namespace: namespace,
4242
Name: "results_code_total",
4343
Help: "Total number of results by code",
44-
}, []string{"key", "group", "name", "type", "code"})
44+
}, append([]string{"key", "group", "name", "type", "code"}, labels...))
4545
resultCertificateExpirationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
4646
Namespace: namespace,
4747
Name: "results_certificate_expiration_seconds",
4848
Help: "Number of seconds until the certificate expires",
49-
}, []string{"key", "group", "name", "type"})
49+
}, append([]string{"key", "group", "name", "type"}, labels...))
5050
resultEndpointSuccess = promauto.NewGaugeVec(prometheus.GaugeOpts{
5151
Namespace: namespace,
5252
Name: "results_endpoint_success",
5353
Help: "Displays whether or not the endpoint was a success",
54-
}, []string{"key", "group", "name", "type"})
54+
}, append([]string{"key", "group", "name", "type"}, labels...))
5555
}
5656

5757
// PublishMetricsForEndpoint publishes metrics for the given endpoint and its result.
5858
// These metrics will be exposed at /metrics if the metrics are enabled
59-
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result) {
60-
if !initializedMetrics {
61-
initializePrometheusMetrics()
62-
initializedMetrics = true
59+
func PublishMetricsForEndpoint(labels []string, ep *endpoint.Endpoint, result *endpoint.Result) {
60+
labelValues := []string{}
61+
for _, label := range labels {
62+
if value, ok := ep.Labels[label]; ok {
63+
labelValues = append(labelValues, value)
64+
} else {
65+
labelValues = append(labelValues, "")
66+
}
6367
}
68+
6469
endpointType := ep.Type()
65-
resultTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)).Inc()
66-
resultDurationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.Duration.Seconds())
70+
resultTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)}, labelValues...)...).Inc()
71+
resultDurationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.Duration.Seconds())
6772
if result.Connected {
68-
resultConnectedTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Inc()
73+
resultConnectedTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Inc()
6974
}
7075
if result.DNSRCode != "" {
71-
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode).Inc()
76+
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode}, labelValues...)...).Inc()
7277
}
7378
if result.HTTPStatus != 0 {
74-
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)).Inc()
79+
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)}, labelValues...)...).Inc()
7580
}
7681
if result.CertificateExpiration != 0 {
77-
resultCertificateExpirationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.CertificateExpiration.Seconds())
82+
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
7883
}
7984
if result.Success {
80-
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(1)
85+
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
8186
} else {
82-
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(0)
87+
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(0)
8388
}
8489
}

metrics/metrics_test.go

+12-7
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,18 @@ import (
55
"testing"
66
"time"
77

8+
"github.com/TwiN/gatus/v5/config"
89
"github.com/TwiN/gatus/v5/config/endpoint"
910
"github.com/TwiN/gatus/v5/config/endpoint/dns"
1011
"github.com/prometheus/client_golang/prometheus"
1112
"github.com/prometheus/client_golang/prometheus/testutil"
1213
)
1314

1415
func TestPublishMetricsForEndpoint(t *testing.T) {
16+
InitializePrometheusMetrics(&config.Config{})
17+
1518
httpEndpoint := &endpoint.Endpoint{Name: "http-ep-name", Group: "http-ep-group", URL: "https://example.org"}
16-
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
19+
PublishMetricsForEndpoint([]string{}, httpEndpoint, &endpoint.Result{
1720
HTTPStatus: 200,
1821
Connected: true,
1922
Duration: 123 * time.Millisecond,
@@ -47,7 +50,7 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
4750
if err != nil {
4851
t.Errorf("Expected no errors but got: %v", err)
4952
}
50-
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
53+
PublishMetricsForEndpoint([]string{}, httpEndpoint, &endpoint.Result{
5154
HTTPStatus: 200,
5255
Connected: true,
5356
Duration: 125 * time.Millisecond,
@@ -82,11 +85,13 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
8285
if err != nil {
8386
t.Errorf("Expected no errors but got: %v", err)
8487
}
85-
dnsEndpoint := &endpoint.Endpoint{Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
86-
QueryType: "A",
87-
QueryName: "example.com.",
88-
}}
89-
PublishMetricsForEndpoint(dnsEndpoint, &endpoint.Result{
88+
dnsEndpoint := &endpoint.Endpoint{
89+
Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
90+
QueryType: "A",
91+
QueryName: "example.com.",
92+
},
93+
}
94+
PublishMetricsForEndpoint([]string{}, dnsEndpoint, &endpoint.Result{
9095
DNSRCode: "NOERROR",
9196
Connected: true,
9297
Duration: 50 * time.Millisecond,

0 commit comments

Comments
 (0)