Skip to content

Commit e6ff9b2

Browse files
huikangrboyer
andauthored
Add metrics labels to all metrics (#270)
* adding metric labels to all metrics Co-authored-by: R.B. Boyer <[email protected]>
1 parent 05405d0 commit e6ff9b2

7 files changed

+48
-26
lines changed

awareness.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@ type awareness struct {
2121
// score is the current awareness score. Lower values are healthier and
2222
// zero is the minimum value.
2323
score int
24+
25+
// metricLabels is the slice of labels to put on all emitted metrics
26+
metricLabels []metrics.Label
2427
}
2528

2629
// newAwareness returns a new awareness object.
27-
func newAwareness(max int) *awareness {
30+
func newAwareness(max int, metricLabels []metrics.Label) *awareness {
2831
return &awareness{
29-
max: max,
30-
score: 0,
32+
max: max,
33+
score: 0,
34+
metricLabels: metricLabels,
3135
}
3236
}
3337

@@ -47,7 +51,7 @@ func (a *awareness) ApplyDelta(delta int) {
4751
a.Unlock()
4852

4953
if initial != final {
50-
metrics.SetGauge([]string{"memberlist", "health", "score"}, float32(final))
54+
metrics.SetGaugeWithLabels([]string{"memberlist", "health", "score"}, float32(final), a.metricLabels)
5155
}
5256
}
5357

awareness_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ func TestAwareness(t *testing.T) {
2727
{-1, 0, 1 * time.Second},
2828
}
2929

30-
a := newAwareness(8)
30+
a := newAwareness(8, nil)
3131
for i, c := range cases {
3232
a.ApplyDelta(c.delta)
3333
if a.GetHealthScore() != c.score {

config.go

+5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010
"time"
1111

12+
"github.com/armon/go-metrics"
1213
multierror "github.com/hashicorp/go-multierror"
1314
)
1415

@@ -244,10 +245,14 @@ type Config struct {
244245
// RequireNodeNames controls if the name of a node is required when sending
245246
// a message to that node.
246247
RequireNodeNames bool
248+
247249
// CIDRsAllowed If nil, allow any connection (default), otherwise specify all networks
248250
// allowed to connect (you must specify IPv6/IPv4 separately)
249251
// Using [] will block all connections.
250252
CIDRsAllowed []net.IPNet
253+
254+
// MetricLabels is a map of optional labels to apply to all metrics emitted.
255+
MetricLabels []metrics.Label
251256
}
252257

253258
// ParseCIDRs return a possible empty list of all Network that have been parsed

memberlist.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"sync/atomic"
2828
"time"
2929

30+
"github.com/armon/go-metrics"
3031
multierror "github.com/hashicorp/go-multierror"
3132
sockaddr "github.com/hashicorp/go-sockaddr"
3233
"github.com/miekg/dns"
@@ -77,6 +78,9 @@ type Memberlist struct {
7778
broadcasts *TransmitLimitedQueue
7879

7980
logger *log.Logger
81+
82+
// metricLabels is the slice of labels to put on all emitted metrics
83+
metricLabels []metrics.Label
8084
}
8185

8286
// BuildVsnArray creates the array of Vsn
@@ -135,9 +139,10 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
135139
transport := conf.Transport
136140
if transport == nil {
137141
nc := &NetTransportConfig{
138-
BindAddrs: []string{conf.BindAddr},
139-
BindPort: conf.BindPort,
140-
Logger: logger,
142+
BindAddrs: []string{conf.BindAddr},
143+
BindPort: conf.BindPort,
144+
Logger: logger,
145+
MetricLabels: conf.MetricLabels,
141146
}
142147

143148
// See comment below for details about the retry in here.
@@ -208,10 +213,11 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
208213
lowPriorityMsgQueue: list.New(),
209214
nodeMap: make(map[string]*nodeState),
210215
nodeTimers: make(map[string]*suspicion),
211-
awareness: newAwareness(conf.AwarenessMaxMultiplier),
216+
awareness: newAwareness(conf.AwarenessMaxMultiplier, conf.MetricLabels),
212217
ackHandlers: make(map[uint32]*ackHandler),
213218
broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
214219
logger: logger,
220+
metricLabels: conf.MetricLabels,
215221
}
216222
m.broadcasts.NumNodes = func() int {
217223
return m.estNumNodes()

net.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ func (m *Memberlist) handleConn(conn net.Conn) {
234234
defer conn.Close()
235235
m.logger.Printf("[DEBUG] memberlist: Stream connection %s", LogConn(conn))
236236

237-
metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1)
237+
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "accept"}, 1, m.metricLabels)
238238

239239
conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
240240

@@ -869,7 +869,7 @@ func (m *Memberlist) rawSendMsgPacket(a Address, node *Node, msg []byte) error {
869869
msg = buf.Bytes()
870870
}
871871

872-
metrics.IncrCounter([]string{"memberlist", "udp", "sent"}, float32(len(msg)))
872+
metrics.IncrCounterWithLabels([]string{"memberlist", "udp", "sent"}, float32(len(msg)), m.metricLabels)
873873
_, err := m.transport.WriteToAddress(msg, a)
874874
return err
875875
}
@@ -898,7 +898,7 @@ func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte, streamLabel
898898
}
899899

900900
// Write out the entire send buffer
901-
metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)))
901+
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)), m.metricLabels)
902902

903903
if n, err := conn.Write(sendBuf); err != nil {
904904
return err
@@ -953,7 +953,7 @@ func (m *Memberlist) sendAndReceiveState(a Address, join bool) ([]pushNodeState,
953953
}
954954
defer conn.Close()
955955
m.logger.Printf("[DEBUG] memberlist: Initiating push/pull sync with: %s %s", a.Name, conn.RemoteAddr())
956-
metrics.IncrCounter([]string{"memberlist", "tcp", "connect"}, 1)
956+
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "connect"}, 1, m.metricLabels)
957957

958958
// Send our state
959959
if err := m.sendLocalState(conn, join, m.config.Label); err != nil {

net_transport.go

+12-5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ type NetTransportConfig struct {
3535

3636
// Logger is a logger for operator messages.
3737
Logger *log.Logger
38+
39+
// MetricLabels is a map of optional labels to apply to all metrics
40+
// emitted by this transport.
41+
MetricLabels []metrics.Label
3842
}
3943

4044
// NetTransport is a Transport implementation that uses connectionless UDP for
@@ -48,6 +52,8 @@ type NetTransport struct {
4852
tcpListeners []*net.TCPListener
4953
udpListeners []*net.UDPConn
5054
shutdown int32
55+
56+
metricLabels []metrics.Label
5157
}
5258

5359
var _ NodeAwareTransport = (*NetTransport)(nil)
@@ -64,10 +70,11 @@ func NewNetTransport(config *NetTransportConfig) (*NetTransport, error) {
6470
// Build out the new transport.
6571
var ok bool
6672
t := NetTransport{
67-
config: config,
68-
packetCh: make(chan *Packet),
69-
streamCh: make(chan net.Conn),
70-
logger: config.Logger,
73+
config: config,
74+
packetCh: make(chan *Packet),
75+
streamCh: make(chan net.Conn),
76+
logger: config.Logger,
77+
metricLabels: config.MetricLabels,
7178
}
7279

7380
// Clean up listeners if there's an error.
@@ -341,7 +348,7 @@ func (t *NetTransport) udpListen(udpLn *net.UDPConn) {
341348
}
342349

343350
// Ingest the packet.
344-
metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n))
351+
metrics.IncrCounterWithLabels([]string{"memberlist", "udp", "received"}, float32(n), t.metricLabels)
345352
t.packetCh <- &Packet{
346353
Buf: buf[:n],
347354
From: addr,

state.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -286,14 +286,14 @@ func failedRemote(err error) bool {
286286

287287
// probeNode handles a single round of failure checking on a node.
288288
func (m *Memberlist) probeNode(node *nodeState) {
289-
defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now())
289+
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "probeNode"}, time.Now(), m.metricLabels)
290290

291291
// We use our health awareness to scale the overall probe interval, so we
292292
// slow down if we detect problems. The ticker that calls us can handle
293293
// us running over the base interval, and will skip missed ticks.
294294
probeInterval := m.awareness.ScaleTimeout(m.config.ProbeInterval)
295295
if probeInterval > m.config.ProbeInterval {
296-
metrics.IncrCounter([]string{"memberlist", "degraded", "probe"}, 1)
296+
metrics.IncrCounterWithLabels([]string{"memberlist", "degraded", "probe"}, 1, m.metricLabels)
297297
}
298298

299299
// Prepare a ping message and setup an ack handler.
@@ -573,7 +573,7 @@ func (m *Memberlist) resetNodes() {
573573
// gossip is invoked every GossipInterval period to broadcast our gossip
574574
// messages to a few random nodes.
575575
func (m *Memberlist) gossip() {
576-
defer metrics.MeasureSince([]string{"memberlist", "gossip"}, time.Now())
576+
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "gossip"}, time.Now(), m.metricLabels)
577577

578578
// Get some random live, suspect, or recently dead nodes
579579
m.nodeLock.RLock()
@@ -653,7 +653,7 @@ func (m *Memberlist) pushPull() {
653653

654654
// pushPullNode does a complete state exchange with a specific node.
655655
func (m *Memberlist) pushPullNode(a Address, join bool) error {
656-
defer metrics.MeasureSince([]string{"memberlist", "pushPullNode"}, time.Now())
656+
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "pushPullNode"}, time.Now(), m.metricLabels)
657657

658658
// Attempt to send and receive with the node
659659
remote, userState, err := m.sendAndReceiveState(a, join)
@@ -1125,7 +1125,7 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
11251125
}
11261126

11271127
// Update metrics
1128-
metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1)
1128+
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "alive"}, 1, m.metricLabels)
11291129

11301130
// Notify the delegate of any relevant updates
11311131
if m.config.Events != nil {
@@ -1183,7 +1183,7 @@ func (m *Memberlist) suspectNode(s *suspect) {
11831183
}
11841184

11851185
// Update metrics
1186-
metrics.IncrCounter([]string{"memberlist", "msg", "suspect"}, 1)
1186+
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "suspect"}, 1, m.metricLabels)
11871187

11881188
// Update the state
11891189
state.Incarnation = s.Incarnation
@@ -1221,7 +1221,7 @@ func (m *Memberlist) suspectNode(s *suspect) {
12211221

12221222
if timeout {
12231223
if k > 0 && numConfirmations < k {
1224-
metrics.IncrCounter([]string{"memberlist", "degraded", "timeout"}, 1)
1224+
metrics.IncrCounterWithLabels([]string{"memberlist", "degraded", "timeout"}, 1, m.metricLabels)
12251225
}
12261226

12271227
m.logger.Printf("[INFO] memberlist: Marking %s as failed, suspect timeout reached (%d peer confirmations)",
@@ -1274,7 +1274,7 @@ func (m *Memberlist) deadNode(d *dead) {
12741274
}
12751275

12761276
// Update metrics
1277-
metrics.IncrCounter([]string{"memberlist", "msg", "dead"}, 1)
1277+
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "dead"}, 1, m.metricLabels)
12781278

12791279
// Update the state
12801280
state.Incarnation = d.Incarnation

0 commit comments

Comments
 (0)