Skip to content

Commit f5973c9

Browse files
authored
Merge pull request #19302 from ahrtr/metrics_20250129
Add e2e metrics test case to ensure no any metrics missing in future PRs
2 parents dd60559 + c4a424a commit f5973c9

File tree

3 files changed

+246
-1
lines changed

3 files changed

+246
-1
lines changed

tests/e2e/metrics_test.go

+241
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@
1515
package e2e
1616

1717
import (
18+
"bytes"
1819
"context"
1920
"fmt"
21+
"io"
22+
"net/http"
23+
"net/url"
2024
"testing"
2125
"time"
2226

27+
dto "github.com/prometheus/client_model/go"
28+
"github.com/prometheus/common/expfmt"
2329
"github.com/stretchr/testify/require"
2430

2531
"go.etcd.io/etcd/api/v3/version"
@@ -126,3 +132,238 @@ func expectLearnerMetric(cx ctlCtx, procIdx int, expectMetric string) {
126132
args := e2e.CURLPrefixArgsCluster(cx.epc.Cfg, cx.epc.Procs[procIdx], "GET", e2e.CURLReq{Endpoint: "/metrics"})
127133
require.NoError(cx.t, e2e.SpawnWithExpectsContext(ctx, args, nil, expect.ExpectedResponse{Value: expectMetric}))
128134
}
135+
136+
func TestNoMetricsMissing(t *testing.T) {
137+
var (
138+
// Note the list doesn't contain all the metrics, because the
139+
// labelled metrics won't be exposed by prometheus by default.
140+
// They are only exposed when at least one value with labels
141+
// is set.
142+
basicMetrics = []string{
143+
"etcd_cluster_version",
144+
"etcd_debugging_auth_revision",
145+
"etcd_debugging_disk_backend_commit_rebalance_duration_seconds",
146+
"etcd_debugging_disk_backend_commit_spill_duration_seconds",
147+
"etcd_debugging_disk_backend_commit_write_duration_seconds",
148+
"etcd_debugging_lease_granted_total",
149+
"etcd_debugging_lease_renewed_total",
150+
"etcd_debugging_lease_revoked_total",
151+
"etcd_debugging_lease_ttl_total",
152+
"etcd_debugging_mvcc_compact_revision",
153+
"etcd_debugging_mvcc_current_revision",
154+
"etcd_debugging_mvcc_db_compaction_keys_total",
155+
"etcd_debugging_mvcc_db_compaction_last",
156+
"etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds",
157+
"etcd_debugging_mvcc_db_compaction_total_duration_milliseconds",
158+
"etcd_debugging_mvcc_events_total",
159+
"etcd_debugging_mvcc_index_compaction_pause_duration_milliseconds",
160+
"etcd_debugging_mvcc_keys_total",
161+
"etcd_debugging_mvcc_pending_events_total",
162+
"etcd_debugging_mvcc_slow_watcher_total",
163+
"etcd_debugging_mvcc_total_put_size_in_bytes",
164+
"etcd_debugging_mvcc_watch_stream_total",
165+
"etcd_debugging_mvcc_watcher_total",
166+
"etcd_debugging_server_lease_expired_total",
167+
"etcd_debugging_snap_save_marshalling_duration_seconds",
168+
"etcd_debugging_snap_save_total_duration_seconds",
169+
"etcd_debugging_store_expires_total",
170+
"etcd_debugging_store_reads_total",
171+
"etcd_debugging_store_watch_requests_total",
172+
"etcd_debugging_store_watchers",
173+
"etcd_debugging_store_writes_total",
174+
"etcd_disk_backend_commit_duration_seconds",
175+
"etcd_disk_backend_defrag_duration_seconds",
176+
"etcd_disk_backend_snapshot_duration_seconds",
177+
"etcd_disk_defrag_inflight",
178+
"etcd_disk_wal_fsync_duration_seconds",
179+
"etcd_disk_wal_write_bytes_total",
180+
"etcd_disk_wal_write_duration_seconds",
181+
"etcd_grpc_proxy_cache_hits_total",
182+
"etcd_grpc_proxy_cache_keys_total",
183+
"etcd_grpc_proxy_cache_misses_total",
184+
"etcd_grpc_proxy_events_coalescing_total",
185+
"etcd_grpc_proxy_watchers_coalescing_total",
186+
"etcd_mvcc_db_open_read_transactions",
187+
"etcd_mvcc_db_total_size_in_bytes",
188+
"etcd_mvcc_db_total_size_in_use_in_bytes",
189+
"etcd_mvcc_delete_total",
190+
"etcd_mvcc_hash_duration_seconds",
191+
"etcd_mvcc_hash_rev_duration_seconds",
192+
"etcd_mvcc_put_total",
193+
"etcd_mvcc_range_total",
194+
"etcd_mvcc_txn_total",
195+
"etcd_network_client_grpc_received_bytes_total",
196+
"etcd_network_client_grpc_sent_bytes_total",
197+
"etcd_network_known_peers",
198+
"etcd_server_apply_duration_seconds",
199+
"etcd_server_client_requests_total",
200+
"etcd_server_go_version",
201+
"etcd_server_has_leader",
202+
"etcd_server_health_failures",
203+
"etcd_server_health_success",
204+
"etcd_server_heartbeat_send_failures_total",
205+
"etcd_server_id",
206+
"etcd_server_is_leader",
207+
"etcd_server_is_learner",
208+
"etcd_server_leader_changes_seen_total",
209+
"etcd_server_learner_promote_successes",
210+
"etcd_server_proposals_applied_total",
211+
"etcd_server_proposals_committed_total",
212+
"etcd_server_proposals_failed_total",
213+
"etcd_server_proposals_pending",
214+
"etcd_server_quota_backend_bytes",
215+
"etcd_server_range_duration_seconds",
216+
"etcd_server_read_indexes_failed_total",
217+
"etcd_server_slow_apply_total",
218+
"etcd_server_slow_read_indexes_total",
219+
"etcd_server_snapshot_apply_in_progress_total",
220+
"etcd_server_version",
221+
"etcd_snap_db_fsync_duration_seconds",
222+
"etcd_snap_db_save_total_duration_seconds",
223+
"etcd_snap_fsync_duration_seconds",
224+
"go_gc_duration_seconds",
225+
"go_gc_gogc_percent",
226+
"go_gc_gomemlimit_bytes",
227+
"go_goroutines",
228+
"go_info",
229+
"go_memstats_alloc_bytes",
230+
"go_memstats_alloc_bytes_total",
231+
"go_memstats_buck_hash_sys_bytes",
232+
"go_memstats_frees_total",
233+
"go_memstats_gc_sys_bytes",
234+
"go_memstats_heap_alloc_bytes",
235+
"go_memstats_heap_idle_bytes",
236+
"go_memstats_heap_inuse_bytes",
237+
"go_memstats_heap_objects",
238+
"go_memstats_heap_released_bytes",
239+
"go_memstats_heap_sys_bytes",
240+
"go_memstats_last_gc_time_seconds",
241+
"go_memstats_mallocs_total",
242+
"go_memstats_mcache_inuse_bytes",
243+
"go_memstats_mcache_sys_bytes",
244+
"go_memstats_mspan_inuse_bytes",
245+
"go_memstats_mspan_sys_bytes",
246+
"go_memstats_next_gc_bytes",
247+
"go_memstats_other_sys_bytes",
248+
"go_memstats_stack_inuse_bytes",
249+
"go_memstats_stack_sys_bytes",
250+
"go_memstats_sys_bytes",
251+
"go_sched_gomaxprocs_threads",
252+
"go_threads",
253+
"grpc_server_handled_total",
254+
"grpc_server_msg_received_total",
255+
"grpc_server_msg_sent_total",
256+
"grpc_server_started_total",
257+
"os_fd_limit",
258+
"os_fd_used",
259+
"promhttp_metric_handler_requests_in_flight",
260+
"promhttp_metric_handler_requests_total",
261+
}
262+
extraMultipleMemberClusterMetrics = []string{
263+
"etcd_network_active_peers",
264+
"etcd_network_peer_received_bytes_total",
265+
"etcd_network_peer_sent_bytes_total",
266+
}
267+
extraExtensiveMetrics = []string{"grpc_server_handling_seconds"}
268+
)
269+
270+
testCases := []struct {
271+
name string
272+
options []e2e.EPClusterOption
273+
expectedMetrics []string
274+
}{
275+
{
276+
name: "basic metrics of 1 member cluster",
277+
options: []e2e.EPClusterOption{
278+
e2e.WithClusterSize(1),
279+
},
280+
expectedMetrics: basicMetrics,
281+
},
282+
{
283+
name: "basic metrics of 3 member cluster",
284+
options: []e2e.EPClusterOption{
285+
e2e.WithClusterSize(3),
286+
},
287+
expectedMetrics: append(basicMetrics, extraMultipleMemberClusterMetrics...),
288+
},
289+
{
290+
name: "extensive metrics of 1 member cluster",
291+
options: []e2e.EPClusterOption{
292+
e2e.WithClusterSize(1),
293+
e2e.WithExtensiveMetrics(),
294+
},
295+
expectedMetrics: append(basicMetrics, extraExtensiveMetrics...),
296+
},
297+
{
298+
name: "extensive metrics of 3 member cluster",
299+
options: []e2e.EPClusterOption{
300+
e2e.WithClusterSize(3),
301+
e2e.WithExtensiveMetrics(),
302+
},
303+
expectedMetrics: append(append(basicMetrics, extraExtensiveMetrics...), extraMultipleMemberClusterMetrics...),
304+
},
305+
}
306+
307+
for _, tc := range testCases {
308+
t.Run(tc.name, func(t *testing.T) {
309+
e2e.BeforeTest(t)
310+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
311+
defer cancel()
312+
313+
epc, err := e2e.NewEtcdProcessCluster(ctx, t, tc.options...)
314+
require.NoError(t, err)
315+
defer epc.Close()
316+
317+
c := epc.Procs[0].Etcdctl()
318+
for i := 0; i < 3; i++ {
319+
err = c.Put(ctx, fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i), config.PutOptions{})
320+
require.NoError(t, err)
321+
}
322+
_, err = c.Get(ctx, "k", config.GetOptions{})
323+
require.NoError(t, err)
324+
325+
metricsURL, err := url.JoinPath(epc.Procs[0].Config().ClientURL, "metrics")
326+
require.NoError(t, err)
327+
328+
mfs, err := getMetrics(metricsURL)
329+
require.NoError(t, err)
330+
331+
var missingMetrics []string
332+
for _, metrics := range tc.expectedMetrics {
333+
if _, ok := mfs[metrics]; !ok {
334+
missingMetrics = append(missingMetrics, metrics)
335+
}
336+
}
337+
require.Emptyf(t, missingMetrics, "Some metrics are missing: %v", missingMetrics)
338+
339+
// Please keep the log below to generate the expected metrics.
340+
// t.Logf("All metrics: %v", formatMetrics(slices.Sorted(maps.Keys(mfs))))
341+
})
342+
}
343+
}
344+
345+
func getMetrics(metricsURL string) (map[string]*dto.MetricFamily, error) {
346+
httpClient := http.Client{Transport: &http.Transport{}}
347+
resp, err := httpClient.Get(metricsURL)
348+
if err != nil {
349+
return nil, err
350+
}
351+
352+
data, err := io.ReadAll(resp.Body)
353+
if err != nil {
354+
return nil, err
355+
}
356+
357+
var parser expfmt.TextParser
358+
return parser.TextToMetricFamilies(bytes.NewReader(data))
359+
}
360+
361+
// formatMetrics is only for test purpose
362+
/*func formatMetrics(metrics []string) string {
363+
quoted := make([]string, len(metrics))
364+
for i, s := range metrics {
365+
quoted[i] = fmt.Sprintf(`"%s",`, s)
366+
}
367+
368+
return fmt.Sprintf("[]string{\n%s\n}", strings.Join(quoted, "\n"))
369+
}*/

tests/framework/e2e/cluster.go

+4
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,10 @@ func WithCipherSuites(suites []string) EPClusterOption {
416416
return func(c *EtcdProcessClusterConfig) { c.ServerConfig.CipherSuites = suites }
417417
}
418418

419+
func WithExtensiveMetrics() EPClusterOption {
420+
return func(c *EtcdProcessClusterConfig) { c.ServerConfig.Metrics = "extensive" }
421+
}
422+
419423
// NewEtcdProcessCluster launches a new cluster from etcd processes, returning
420424
// a new EtcdProcessCluster once all nodes are ready to accept client requests.
421425
func NewEtcdProcessCluster(ctx context.Context, t testing.TB, opts ...EPClusterOption) (*EtcdProcessCluster, error) {

tests/go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ require (
2424
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
2525
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.0
2626
github.com/prometheus/client_golang v1.20.5
27+
github.com/prometheus/client_model v0.6.1
2728
github.com/prometheus/common v0.62.0
2829
github.com/soheilhy/cmux v0.1.5
2930
github.com/stretchr/testify v1.10.0
@@ -79,7 +80,6 @@ require (
7980
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
8081
github.com/olekukonko/tablewriter v0.0.5 // indirect
8182
github.com/pmezard/go-difflib v1.0.0 // indirect
82-
github.com/prometheus/client_model v0.6.1 // indirect
8383
github.com/prometheus/procfs v0.15.1 // indirect
8484
github.com/rivo/uniseg v0.4.7 // indirect
8585
github.com/sirupsen/logrus v1.9.3 // indirect

0 commit comments

Comments
 (0)