Skip to content

Commit d98d2f6

Browse files
authored
Refactor tests for slm collector (#928)
- Remove up, totalScrapes, and jsonParseFailures metrics. They are not useful. - Move fixtures to individual files - Base tests on the metric output for better testing the expected output instead of the internals. Signed-off-by: Joe Adams <[email protected]>
1 parent 711a6ce commit d98d2f6

File tree

4 files changed

+120
-59
lines changed

4 files changed

+120
-59
lines changed

collector/slm.go

-33
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ type SLM struct {
6161
client *http.Client
6262
url *url.URL
6363

64-
up prometheus.Gauge
65-
totalScrapes, jsonParseFailures prometheus.Counter
66-
6764
slmMetrics []*slmMetric
6865
policyMetrics []*policyMetric
6966
slmStatusMetric *slmStatusMetric
@@ -75,19 +72,6 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM {
7572
logger: logger,
7673
client: client,
7774
url: url,
78-
79-
up: prometheus.NewGauge(prometheus.GaugeOpts{
80-
Name: prometheus.BuildFQName(namespace, "slm_stats", "up"),
81-
Help: "Was the last scrape of the Elasticsearch SLM endpoint successful.",
82-
}),
83-
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
84-
Name: prometheus.BuildFQName(namespace, "slm_stats", "total_scrapes"),
85-
Help: "Current total Elasticsearch SLM scrapes.",
86-
}),
87-
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
88-
Name: prometheus.BuildFQName(namespace, "slm_stats", "json_parse_failures"),
89-
Help: "Number of errors while parsing JSON.",
90-
}),
9175
slmMetrics: []*slmMetric{
9276
{
9377
Type: prometheus.CounterValue,
@@ -257,9 +241,6 @@ func (s *SLM) Describe(ch chan<- *prometheus.Desc) {
257241
ch <- metric.Desc
258242
}
259243

260-
ch <- s.up.Desc()
261-
ch <- s.totalScrapes.Desc()
262-
ch <- s.jsonParseFailures.Desc()
263244
}
264245

265246
func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) {
@@ -289,12 +270,10 @@ func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) {
289270

290271
bts, err := io.ReadAll(res.Body)
291272
if err != nil {
292-
s.jsonParseFailures.Inc()
293273
return ssr, err
294274
}
295275

296276
if err := json.Unmarshal(bts, &ssr); err != nil {
297-
s.jsonParseFailures.Inc()
298277
return ssr, err
299278
}
300279

@@ -328,12 +307,10 @@ func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) {
328307

329308
bts, err := io.ReadAll(res.Body)
330309
if err != nil {
331-
s.jsonParseFailures.Inc()
332310
return ssr, err
333311
}
334312

335313
if err := json.Unmarshal(bts, &ssr); err != nil {
336-
s.jsonParseFailures.Inc()
337314
return ssr, err
338315
}
339316

@@ -342,16 +319,9 @@ func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) {
342319

343320
// Collect gets SLM metric values
344321
func (s *SLM) Collect(ch chan<- prometheus.Metric) {
345-
s.totalScrapes.Inc()
346-
defer func() {
347-
ch <- s.up
348-
ch <- s.totalScrapes
349-
ch <- s.jsonParseFailures
350-
}()
351322

352323
slmStatusResp, err := s.fetchAndDecodeSLMStatus()
353324
if err != nil {
354-
s.up.Set(0)
355325
level.Warn(s.logger).Log(
356326
"msg", "failed to fetch and decode slm status",
357327
"err", err,
@@ -361,16 +331,13 @@ func (s *SLM) Collect(ch chan<- prometheus.Metric) {
361331

362332
slmStatsResp, err := s.fetchAndDecodeSLMStats()
363333
if err != nil {
364-
s.up.Set(0)
365334
level.Warn(s.logger).Log(
366335
"msg", "failed to fetch and decode slm stats",
367336
"err", err,
368337
)
369338
return
370339
}
371340

372-
s.up.Set(1)
373-
374341
for _, status := range statuses {
375342
ch <- prometheus.MustNewConstMetric(
376343
s.slmStatusMetric.Desc,

collector/slm_test.go

+99-26
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@
1414
package collector
1515

1616
import (
17-
"fmt"
17+
"io"
1818
"net/http"
1919
"net/http/httptest"
2020
"net/url"
21+
"os"
22+
"path"
23+
"strings"
2124
"testing"
2225

2326
"github.com/go-kit/log"
27+
"github.com/prometheus/client_golang/prometheus/testutil"
2428
)
2529

2630
func TestSLM(t *testing.T) {
@@ -31,35 +35,104 @@ func TestSLM(t *testing.T) {
3135
// curl -XPUT http://127.0.0.1:9200/_slm/policy/everything -H 'Content-Type: application/json' -d '{"schedule":"0 */15 * * * ?","name":"<everything-{now/d}>","repository":"my_repository","config":{"indices":".*","include_global_state":true,"ignore_unavailable":true},"retention":{"expire_after":"7d"}}'
3236
// curl http://127.0.0.1:9200/_slm/stats (Numbers manually tweaked)
3337

34-
tcs := map[string]string{
35-
"7.15.0": `{"retention_runs":9,"retention_failed":0,"retention_timed_out":0,"retention_deletion_time":"1.2m","retention_deletion_time_millis":72491,"total_snapshots_taken":103,"total_snapshots_failed":2,"total_snapshots_deleted":20,"total_snapshot_deletion_failures":0,"policy_stats":[{"policy":"everything","snapshots_taken":50,"snapshots_failed":2,"snapshots_deleted":20,"snapshot_deletion_failures":0}]}`,
38+
tests := []struct {
39+
name string
40+
file string
41+
want string
42+
}{
43+
{
44+
name: "7.15.0",
45+
file: "7.15.0.json",
46+
want: `# HELP elasticsearch_slm_stats_operation_mode Operating status of SLM
47+
# TYPE elasticsearch_slm_stats_operation_mode gauge
48+
elasticsearch_slm_stats_operation_mode{operation_mode="RUNNING"} 0
49+
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPED"} 0
50+
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPING"} 0
51+
# HELP elasticsearch_slm_stats_retention_deletion_time_seconds Retention run deletion time
52+
# TYPE elasticsearch_slm_stats_retention_deletion_time_seconds gauge
53+
elasticsearch_slm_stats_retention_deletion_time_seconds 72.491
54+
# HELP elasticsearch_slm_stats_retention_failed_total Total failed retention runs
55+
# TYPE elasticsearch_slm_stats_retention_failed_total counter
56+
elasticsearch_slm_stats_retention_failed_total 0
57+
# HELP elasticsearch_slm_stats_retention_runs_total Total retention runs
58+
# TYPE elasticsearch_slm_stats_retention_runs_total counter
59+
elasticsearch_slm_stats_retention_runs_total 9
60+
# HELP elasticsearch_slm_stats_retention_timed_out_total Total timed out retention runs
61+
# TYPE elasticsearch_slm_stats_retention_timed_out_total counter
62+
elasticsearch_slm_stats_retention_timed_out_total 0
63+
# HELP elasticsearch_slm_stats_snapshot_deletion_failures_total Total snapshot deletion failures
64+
# TYPE elasticsearch_slm_stats_snapshot_deletion_failures_total counter
65+
elasticsearch_slm_stats_snapshot_deletion_failures_total{policy="everything"} 0
66+
# HELP elasticsearch_slm_stats_snapshots_deleted_total Total snapshots deleted
67+
# TYPE elasticsearch_slm_stats_snapshots_deleted_total counter
68+
elasticsearch_slm_stats_snapshots_deleted_total{policy="everything"} 20
69+
# HELP elasticsearch_slm_stats_snapshots_failed_total Total snapshots failed
70+
# TYPE elasticsearch_slm_stats_snapshots_failed_total counter
71+
elasticsearch_slm_stats_snapshots_failed_total{policy="everything"} 2
72+
# HELP elasticsearch_slm_stats_snapshots_taken_total Total snapshots taken
73+
# TYPE elasticsearch_slm_stats_snapshots_taken_total counter
74+
elasticsearch_slm_stats_snapshots_taken_total{policy="everything"} 50
75+
# HELP elasticsearch_slm_stats_total_snapshot_deletion_failures_total Total snapshot deletion failures
76+
# TYPE elasticsearch_slm_stats_total_snapshot_deletion_failures_total counter
77+
elasticsearch_slm_stats_total_snapshot_deletion_failures_total 0
78+
# HELP elasticsearch_slm_stats_total_snapshots_deleted_total Total snapshots deleted
79+
# TYPE elasticsearch_slm_stats_total_snapshots_deleted_total counter
80+
elasticsearch_slm_stats_total_snapshots_deleted_total 20
81+
# HELP elasticsearch_slm_stats_total_snapshots_failed_total Total snapshots failed
82+
# TYPE elasticsearch_slm_stats_total_snapshots_failed_total counter
83+
elasticsearch_slm_stats_total_snapshots_failed_total 2
84+
# HELP elasticsearch_slm_stats_total_snapshots_taken_total Total snapshots taken
85+
# TYPE elasticsearch_slm_stats_total_snapshots_taken_total counter
86+
elasticsearch_slm_stats_total_snapshots_taken_total 103
87+
`,
88+
},
3689
}
37-
for ver, out := range tcs {
38-
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
39-
fmt.Fprintln(w, out)
40-
}))
41-
defer ts.Close()
4290

43-
u, err := url.Parse(ts.URL)
44-
if err != nil {
45-
t.Fatalf("Failed to parse URL: %s", err)
46-
}
47-
s := NewSLM(log.NewNopLogger(), http.DefaultClient, u)
48-
stats, err := s.fetchAndDecodeSLMStats()
49-
if err != nil {
50-
t.Fatalf("Failed to fetch or decode snapshots stats: %s", err)
51-
}
52-
t.Logf("[%s] SLM Response: %+v", ver, stats)
53-
slmStats := stats
54-
policyStats := stats.PolicyStats[0]
91+
for _, tt := range tests {
92+
t.Run(tt.name, func(t *testing.T) {
93+
fStatsPath := path.Join("../fixtures/slm/stats/", tt.file)
94+
fStats, err := os.Open(fStatsPath)
95+
if err != nil {
96+
t.Fatal(err)
97+
}
98+
defer fStats.Close()
5599

56-
if slmStats.TotalSnapshotsTaken != 103 {
57-
t.Errorf("Bad number of total snapshots taken")
58-
}
100+
fStatusPath := path.Join("../fixtures/slm/status/", tt.file)
101+
fStatus, err := os.Open(fStatusPath)
102+
if err != nil {
103+
t.Fatal(err)
104+
}
105+
defer fStatus.Close()
106+
107+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
108+
switch r.RequestURI {
109+
case "/_slm/stats":
110+
io.Copy(w, fStats)
111+
return
112+
case "/_slm/status":
113+
io.Copy(w, fStatus)
114+
return
115+
}
116+
117+
http.Error(w, "Not Found", http.StatusNotFound)
118+
}))
119+
defer ts.Close()
120+
121+
u, err := url.Parse(ts.URL)
122+
if err != nil {
123+
t.Fatalf("Failed to parse URL: %s", err)
124+
}
125+
126+
s := NewSLM(log.NewNopLogger(), http.DefaultClient, u)
127+
if err != nil {
128+
t.Fatal(err)
129+
}
130+
131+
if err := testutil.CollectAndCompare(s, strings.NewReader(tt.want)); err != nil {
132+
t.Fatal(err)
133+
}
134+
})
59135

60-
if policyStats.SnapshotsTaken != 50 {
61-
t.Errorf("Bad number of policy snapshots taken")
62-
}
63136
}
64137

65138
}

fixtures/slm/stats/7.15.0.json

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"retention_runs": 9,
3+
"retention_failed": 0,
4+
"retention_timed_out": 0,
5+
"retention_deletion_time": "1.2m",
6+
"retention_deletion_time_millis": 72491,
7+
"total_snapshots_taken": 103,
8+
"total_snapshots_failed": 2,
9+
"total_snapshots_deleted": 20,
10+
"total_snapshot_deletion_failures": 0,
11+
"policy_stats": [
12+
{
13+
"policy": "everything",
14+
"snapshots_taken": 50,
15+
"snapshots_failed": 2,
16+
"snapshots_deleted": 20,
17+
"snapshot_deletion_failures": 0
18+
}
19+
]
20+
}

fixtures/slm/status/7.15.0.json

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

0 commit comments

Comments
 (0)