@@ -6,6 +6,12 @@ import (
6
6
"fmt"
7
7
"time"
8
8
9
+ "go.opentelemetry.io/otel/attribute"
10
+ "go.opentelemetry.io/otel/metric/global"
11
+ "go.opentelemetry.io/otel/metric/instrument"
12
+ "go.opentelemetry.io/otel/metric/instrument/syncint64"
13
+ "go.opentelemetry.io/otel/metric/unit"
14
+
9
15
"github.com/celestiaorg/nmt/namespace"
10
16
"github.com/celestiaorg/rsmt2d"
11
17
@@ -23,8 +29,61 @@ const (
23
29
// serve getEDS request for block size 256
24
30
defaultMinRequestTimeout = time .Minute // should be >= shrexeds server write timeout
25
31
defaultMinAttemptsCount = 3
32
+ metricObservationTimeout = 100 * time .Millisecond
26
33
)
27
34
35
+ var meter = global .MeterProvider ().Meter ("shrex/getter" )
36
+
37
+ type metrics struct {
38
+ edsAttempts syncint64.Histogram
39
+ ndAttempts syncint64.Histogram
40
+ }
41
+
42
+ func (m * metrics ) recordEDSAttempt (attemptCount int , success bool ) {
43
+ if m == nil {
44
+ return
45
+ }
46
+ ctx , cancel := context .WithTimeout (context .Background (), metricObservationTimeout )
47
+ defer cancel ()
48
+ m .edsAttempts .Record (ctx , int64 (attemptCount ), attribute .Bool ("success" , success ))
49
+ }
50
+
51
+ func (m * metrics ) recordNDAttempt (attemptCount int , success bool ) {
52
+ if m == nil {
53
+ return
54
+ }
55
+
56
+ ctx , cancel := context .WithTimeout (context .Background (), metricObservationTimeout )
57
+ defer cancel ()
58
+ m .ndAttempts .Record (ctx , int64 (attemptCount ), attribute .Bool ("success" , success ))
59
+ }
60
+
61
+ func (sg * ShrexGetter ) WithMetrics () error {
62
+ edsAttemptHistogram , err := meter .SyncInt64 ().Histogram (
63
+ "getters_shrex_eds_attempts_per_request" ,
64
+ instrument .WithUnit (unit .Dimensionless ),
65
+ instrument .WithDescription ("Number of attempts per shrex/eds request" ),
66
+ )
67
+ if err != nil {
68
+ return err
69
+ }
70
+
71
+ ndAttemptHistogram , err := meter .SyncInt64 ().Histogram (
72
+ "getters_shrex_nd_attempts_per_request" ,
73
+ instrument .WithUnit (unit .Dimensionless ),
74
+ instrument .WithDescription ("Number of attempts per shrex/nd request" ),
75
+ )
76
+ if err != nil {
77
+ return err
78
+ }
79
+
80
+ sg .metrics = & metrics {
81
+ edsAttempts : edsAttemptHistogram ,
82
+ ndAttempts : ndAttemptHistogram ,
83
+ }
84
+ return nil
85
+ }
86
+
28
87
// ShrexGetter is a share.Getter that uses the shrex/eds and shrex/nd protocol to retrieve shares.
29
88
type ShrexGetter struct {
30
89
edsClient * shrexeds.Client
@@ -37,6 +96,8 @@ type ShrexGetter struct {
37
96
// minAttemptsCount will be used to split request timeout into multiple attempts. It will allow to
38
97
// attempt multiple peers in scope of one request before context timeout is reached
39
98
minAttemptsCount int
99
+
100
+ metrics * metrics
40
101
}
41
102
42
103
func NewShrexGetter (edsClient * shrexeds.Client , ndClient * shrexnd.Client , peerManager * peers.Manager ) * ShrexGetter {
@@ -79,6 +140,7 @@ func (sg *ShrexGetter) GetEDS(ctx context.Context, root *share.Root) (*rsmt2d.Ex
79
140
"hash" , root .String (),
80
141
"err" , getErr ,
81
142
"finished (s)" , time .Since (start ))
143
+ sg .metrics .recordEDSAttempt (attempt , false )
82
144
return nil , fmt .Errorf ("getter/shrex: %w" , err )
83
145
}
84
146
@@ -89,6 +151,7 @@ func (sg *ShrexGetter) GetEDS(ctx context.Context, root *share.Root) (*rsmt2d.Ex
89
151
switch {
90
152
case getErr == nil :
91
153
setStatus (peers .ResultSynced )
154
+ sg .metrics .recordEDSAttempt (attempt , true )
92
155
return eds , nil
93
156
case errors .Is (getErr , context .DeadlineExceeded ),
94
157
errors .Is (getErr , context .Canceled ):
@@ -135,6 +198,7 @@ func (sg *ShrexGetter) GetSharesByNamespace(
135
198
"hash" , root .String (),
136
199
"err" , getErr ,
137
200
"finished (s)" , time .Since (start ))
201
+ sg .metrics .recordNDAttempt (attempt , false )
138
202
return nil , fmt .Errorf ("getter/shrex: %w" , err )
139
203
}
140
204
@@ -145,6 +209,7 @@ func (sg *ShrexGetter) GetSharesByNamespace(
145
209
switch {
146
210
case getErr == nil :
147
211
setStatus (peers .ResultNoop )
212
+ sg .metrics .recordNDAttempt (attempt , true )
148
213
return nd , nil
149
214
case errors .Is (getErr , context .DeadlineExceeded ),
150
215
errors .Is (getErr , context .Canceled ):
0 commit comments