Skip to content

Refactor tests for slm collector #928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions collector/slm.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ type SLM struct {
client *http.Client
url *url.URL

up prometheus.Gauge
totalScrapes, jsonParseFailures prometheus.Counter

slmMetrics []*slmMetric
policyMetrics []*policyMetric
slmStatusMetric *slmStatusMetric
Expand All @@ -75,19 +72,6 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM {
logger: logger,
client: client,
url: url,

up: prometheus.NewGauge(prometheus.GaugeOpts{
Name: prometheus.BuildFQName(namespace, "slm_stats", "up"),
Help: "Was the last scrape of the Elasticsearch SLM endpoint successful.",
}),
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, "slm_stats", "total_scrapes"),
Help: "Current total Elasticsearch SLM scrapes.",
}),
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, "slm_stats", "json_parse_failures"),
Help: "Number of errors while parsing JSON.",
}),
slmMetrics: []*slmMetric{
{
Type: prometheus.CounterValue,
Expand Down Expand Up @@ -257,9 +241,6 @@ func (s *SLM) Describe(ch chan<- *prometheus.Desc) {
ch <- metric.Desc
}

ch <- s.up.Desc()
ch <- s.totalScrapes.Desc()
ch <- s.jsonParseFailures.Desc()
}

func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) {
Expand Down Expand Up @@ -289,12 +270,10 @@ func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) {

bts, err := io.ReadAll(res.Body)
if err != nil {
s.jsonParseFailures.Inc()
return ssr, err
}

if err := json.Unmarshal(bts, &ssr); err != nil {
s.jsonParseFailures.Inc()
return ssr, err
}

Expand Down Expand Up @@ -328,12 +307,10 @@ func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) {

bts, err := io.ReadAll(res.Body)
if err != nil {
s.jsonParseFailures.Inc()
return ssr, err
}

if err := json.Unmarshal(bts, &ssr); err != nil {
s.jsonParseFailures.Inc()
return ssr, err
}

Expand All @@ -342,16 +319,9 @@ func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) {

// Collect gets SLM metric values
func (s *SLM) Collect(ch chan<- prometheus.Metric) {
s.totalScrapes.Inc()
defer func() {
ch <- s.up
ch <- s.totalScrapes
ch <- s.jsonParseFailures
}()

slmStatusResp, err := s.fetchAndDecodeSLMStatus()
if err != nil {
s.up.Set(0)
level.Warn(s.logger).Log(
"msg", "failed to fetch and decode slm status",
"err", err,
Expand All @@ -361,16 +331,13 @@ func (s *SLM) Collect(ch chan<- prometheus.Metric) {

slmStatsResp, err := s.fetchAndDecodeSLMStats()
if err != nil {
s.up.Set(0)
level.Warn(s.logger).Log(
"msg", "failed to fetch and decode slm stats",
"err", err,
)
return
}

s.up.Set(1)

for _, status := range statuses {
ch <- prometheus.MustNewConstMetric(
s.slmStatusMetric.Desc,
Expand Down
125 changes: 99 additions & 26 deletions collector/slm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@
package collector

import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path"
"strings"
"testing"

"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus/testutil"
)

func TestSLM(t *testing.T) {
Expand All @@ -31,35 +35,104 @@ func TestSLM(t *testing.T) {
// curl -XPUT http://127.0.0.1:9200/_slm/policy/everything -H 'Content-Type: application/json' -d '{"schedule":"0 */15 * * * ?","name":"<everything-{now/d}>","repository":"my_repository","config":{"indices":".*","include_global_state":true,"ignore_unavailable":true},"retention":{"expire_after":"7d"}}'
// curl http://127.0.0.1:9200/_slm/stats (Numbers manually tweaked)

tcs := map[string]string{
"7.15.0": `{"retention_runs":9,"retention_failed":0,"retention_timed_out":0,"retention_deletion_time":"1.2m","retention_deletion_time_millis":72491,"total_snapshots_taken":103,"total_snapshots_failed":2,"total_snapshots_deleted":20,"total_snapshot_deletion_failures":0,"policy_stats":[{"policy":"everything","snapshots_taken":50,"snapshots_failed":2,"snapshots_deleted":20,"snapshot_deletion_failures":0}]}`,
tests := []struct {
name string
file string
want string
}{
{
name: "7.15.0",
file: "7.15.0.json",
want: `# HELP elasticsearch_slm_stats_operation_mode Operating status of SLM
# TYPE elasticsearch_slm_stats_operation_mode gauge
elasticsearch_slm_stats_operation_mode{operation_mode="RUNNING"} 0
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPED"} 0
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPING"} 0
# HELP elasticsearch_slm_stats_retention_deletion_time_seconds Retention run deletion time
# TYPE elasticsearch_slm_stats_retention_deletion_time_seconds gauge
elasticsearch_slm_stats_retention_deletion_time_seconds 72.491
# HELP elasticsearch_slm_stats_retention_failed_total Total failed retention runs
# TYPE elasticsearch_slm_stats_retention_failed_total counter
elasticsearch_slm_stats_retention_failed_total 0
# HELP elasticsearch_slm_stats_retention_runs_total Total retention runs
# TYPE elasticsearch_slm_stats_retention_runs_total counter
elasticsearch_slm_stats_retention_runs_total 9
# HELP elasticsearch_slm_stats_retention_timed_out_total Total timed out retention runs
# TYPE elasticsearch_slm_stats_retention_timed_out_total counter
elasticsearch_slm_stats_retention_timed_out_total 0
# HELP elasticsearch_slm_stats_snapshot_deletion_failures_total Total snapshot deletion failures
# TYPE elasticsearch_slm_stats_snapshot_deletion_failures_total counter
elasticsearch_slm_stats_snapshot_deletion_failures_total{policy="everything"} 0
# HELP elasticsearch_slm_stats_snapshots_deleted_total Total snapshots deleted
# TYPE elasticsearch_slm_stats_snapshots_deleted_total counter
elasticsearch_slm_stats_snapshots_deleted_total{policy="everything"} 20
# HELP elasticsearch_slm_stats_snapshots_failed_total Total snapshots failed
# TYPE elasticsearch_slm_stats_snapshots_failed_total counter
elasticsearch_slm_stats_snapshots_failed_total{policy="everything"} 2
# HELP elasticsearch_slm_stats_snapshots_taken_total Total snapshots taken
# TYPE elasticsearch_slm_stats_snapshots_taken_total counter
elasticsearch_slm_stats_snapshots_taken_total{policy="everything"} 50
# HELP elasticsearch_slm_stats_total_snapshot_deletion_failures_total Total snapshot deletion failures
# TYPE elasticsearch_slm_stats_total_snapshot_deletion_failures_total counter
elasticsearch_slm_stats_total_snapshot_deletion_failures_total 0
# HELP elasticsearch_slm_stats_total_snapshots_deleted_total Total snapshots deleted
# TYPE elasticsearch_slm_stats_total_snapshots_deleted_total counter
elasticsearch_slm_stats_total_snapshots_deleted_total 20
# HELP elasticsearch_slm_stats_total_snapshots_failed_total Total snapshots failed
# TYPE elasticsearch_slm_stats_total_snapshots_failed_total counter
elasticsearch_slm_stats_total_snapshots_failed_total 2
# HELP elasticsearch_slm_stats_total_snapshots_taken_total Total snapshots taken
# TYPE elasticsearch_slm_stats_total_snapshots_taken_total counter
elasticsearch_slm_stats_total_snapshots_taken_total 103
`,
},
}
for ver, out := range tcs {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, out)
}))
defer ts.Close()

u, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("Failed to parse URL: %s", err)
}
s := NewSLM(log.NewNopLogger(), http.DefaultClient, u)
stats, err := s.fetchAndDecodeSLMStats()
if err != nil {
t.Fatalf("Failed to fetch or decode snapshots stats: %s", err)
}
t.Logf("[%s] SLM Response: %+v", ver, stats)
slmStats := stats
policyStats := stats.PolicyStats[0]
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fStatsPath := path.Join("../fixtures/slm/stats/", tt.file)
fStats, err := os.Open(fStatsPath)
if err != nil {
t.Fatal(err)
}
defer fStats.Close()

if slmStats.TotalSnapshotsTaken != 103 {
t.Errorf("Bad number of total snapshots taken")
}
fStatusPath := path.Join("../fixtures/slm/status/", tt.file)
fStatus, err := os.Open(fStatusPath)
if err != nil {
t.Fatal(err)
}
defer fStatus.Close()

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.RequestURI {
case "/_slm/stats":
io.Copy(w, fStats)
return
case "/_slm/status":
io.Copy(w, fStatus)
return
}

http.Error(w, "Not Found", http.StatusNotFound)
}))
defer ts.Close()

u, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("Failed to parse URL: %s", err)
}

s := NewSLM(log.NewNopLogger(), http.DefaultClient, u)
if err != nil {
t.Fatal(err)
}

if err := testutil.CollectAndCompare(s, strings.NewReader(tt.want)); err != nil {
t.Fatal(err)
}
})

if policyStats.SnapshotsTaken != 50 {
t.Errorf("Bad number of policy snapshots taken")
}
}

}
20 changes: 20 additions & 0 deletions fixtures/slm/stats/7.15.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"retention_runs": 9,
"retention_failed": 0,
"retention_timed_out": 0,
"retention_deletion_time": "1.2m",
"retention_deletion_time_millis": 72491,
"total_snapshots_taken": 103,
"total_snapshots_failed": 2,
"total_snapshots_deleted": 20,
"total_snapshot_deletion_failures": 0,
"policy_stats": [
{
"policy": "everything",
"snapshots_taken": 50,
"snapshots_failed": 2,
"snapshots_deleted": 20,
"snapshot_deletion_failures": 0
}
]
}
1 change: 1 addition & 0 deletions fixtures/slm/status/7.15.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}