Skip to content

Commit 9c88db2

Browse files
authored
Merge pull request #276 from hashicorp/jm-member-list-size-metric
Emit metrics for local memberlist size and remote memberlist size
2 parents e892776 + e1b2314 commit 9c88db2

File tree

5 files changed

+69
-6
lines changed

5 files changed

+69
-6
lines changed

memberlist_test.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -259,13 +259,8 @@ func TestCreate_checkBroadcastQueueMetrics(t *testing.T) {
259259

260260
time.Sleep(3 * time.Second)
261261

262-
intv := getIntervalMetrics(t, sink)
263262
sampleName := "consul.usage.test.memberlist.queue.broadcasts"
264-
actualSample := intv.Samples[sampleName]
265-
266-
if actualSample.Count == 0 {
267-
t.Fatalf("%s sample not taken", sampleName)
268-
}
263+
verifySampleExists(t, sampleName, sink)
269264
}
270265

271266
func TestCreate_keyringOnly(t *testing.T) {

net.go

+21
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,22 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool, streamLabel string
10071007
}
10081008
m.nodeLock.RUnlock()
10091009

1010+
nodeStateCounts := make(map[string]int)
1011+
nodeStateCounts[StateAlive.metricsString()] = 0
1012+
nodeStateCounts[StateLeft.metricsString()] = 0
1013+
nodeStateCounts[StateDead.metricsString()] = 0
1014+
nodeStateCounts[StateSuspect.metricsString()] = 0
1015+
1016+
for _, n := range localNodes {
1017+
nodeStateCounts[n.State.metricsString()]++
1018+
}
1019+
1020+
for nodeState, cnt := range nodeStateCounts {
1021+
metrics.SetGaugeWithLabels([]string{"memberlist", "node", "instances"},
1022+
float32(cnt),
1023+
append(m.metricLabels, metrics.Label{Name: "node_state", Value: nodeState}))
1024+
}
1025+
10101026
// Get the delegate state
10111027
var userData []byte
10121028
if m.config.Delegate != nil {
@@ -1042,6 +1058,9 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool, streamLabel string
10421058
}
10431059
}
10441060

1061+
moreBytes := binary.BigEndian.Uint32(bufConn.Bytes()[1:5])
1062+
metrics.SetGaugeWithLabels([]string{"memberlist", "size", "local"}, float32(moreBytes), m.metricLabels)
1063+
10451064
// Get the send buffer
10461065
return m.rawSendMsgStream(conn, bufConn.Bytes(), streamLabel)
10471066
}
@@ -1088,6 +1107,8 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader, streamLabel string) (
10881107
// Ensure we aren't asked to download too much. This is to guard against
10891108
// an attack vector where a huge amount of state is sent
10901109
moreBytes := binary.BigEndian.Uint32(cipherText.Bytes()[1:5])
1110+
metrics.AddSampleWithLabels([]string{"memberlist", "size", "remote"}, float32(moreBytes), m.metricLabels)
1111+
10911112
if moreBytes > maxPushStateBytes {
10921113
return nil, fmt.Errorf("Remote node state is larger than limit (%d)", moreBytes)
10931114

net_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ func TestEncryptDecryptState(t *testing.T) {
690690
SecretKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
691691
ProtocolVersion: ProtocolVersionMax,
692692
}
693+
sink := registerInMemorySink(t)
693694

694695
m, err := Create(config)
695696
if err != nil {
@@ -710,6 +711,7 @@ func TestEncryptDecryptState(t *testing.T) {
710711
if err != nil {
711712
t.Fatalf("err: %v", err)
712713
}
714+
verifySampleExists(t, "consul.usage.test.memberlist.size.remote", sink)
713715

714716
if !reflect.DeepEqual(state, plain) {
715717
t.Fatalf("Decrypt failed: %v", plain)

state.go

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@ import (
1515

1616
type NodeStateType int
1717

18+
func (t NodeStateType) metricsString() string {
19+
switch t {
20+
case StateAlive:
21+
return "alive"
22+
case StateDead:
23+
return "dead"
24+
case StateSuspect:
25+
return "suspect"
26+
case StateLeft:
27+
return "left"
28+
default:
29+
return fmt.Sprintf("unhandled-value-%d", t)
30+
}
31+
}
32+
1833
const (
1934
StateAlive NodeStateType = iota
2035
StateSuspect

state_test.go

+30
Original file line numberDiff line numberDiff line change
@@ -2239,6 +2239,8 @@ func TestMemberlist_PushPull(t *testing.T) {
22392239
ip1 := []byte(addr1)
22402240
ip2 := []byte(addr2)
22412241

2242+
sink := registerInMemorySink(t)
2243+
22422244
ch := make(chan NodeEvent, 3)
22432245

22442246
m1 := HostMemberlist(addr1.String(), t, func(c *Config) {
@@ -2270,6 +2272,13 @@ func TestMemberlist_PushPull(t *testing.T) {
22702272
if len(ch) < 2 {
22712273
failf("expected 2 messages from pushPull")
22722274
}
2275+
2276+
instancesMetricName := "consul.usage.test.memberlist.node.instances"
2277+
verifyGaugeExists(t, "consul.usage.test.memberlist.size.local", sink)
2278+
verifyGaugeExists(t, fmt.Sprintf("%s;node_state=%s", instancesMetricName, StateAlive.metricsString()), sink)
2279+
verifyGaugeExists(t, fmt.Sprintf("%s;node_state=%s", instancesMetricName, StateDead.metricsString()), sink)
2280+
verifyGaugeExists(t, fmt.Sprintf("%s;node_state=%s", instancesMetricName, StateLeft.metricsString()), sink)
2281+
verifyGaugeExists(t, fmt.Sprintf("%s;node_state=%s", instancesMetricName, StateSuspect.metricsString()), sink)
22732282
})
22742283
}
22752284

@@ -2412,3 +2421,24 @@ func getIntervalMetrics(t *testing.T, sink *metrics.InmemSink) *metrics.Interval
24122421
intv := intervals[0]
24132422
return intv
24142423
}
2424+
2425+
func verifyGaugeExists(t *testing.T, name string, sink *metrics.InmemSink) {
2426+
t.Helper()
2427+
interval := getIntervalMetrics(t, sink)
2428+
interval.RLock()
2429+
defer interval.RUnlock()
2430+
if _, ok := interval.Gauges[name]; !ok {
2431+
t.Fatalf("%s gauge not emmited", name)
2432+
}
2433+
}
2434+
2435+
func verifySampleExists(t *testing.T, name string, sink *metrics.InmemSink) {
2436+
t.Helper()
2437+
interval := getIntervalMetrics(t, sink)
2438+
interval.RLock()
2439+
defer interval.RUnlock()
2440+
2441+
if _, ok := interval.Samples[name]; !ok {
2442+
t.Fatalf("%s sample not emmited", name)
2443+
}
2444+
}

0 commit comments

Comments
 (0)