Skip to content

Commit d5e5f70

Browse files
Add error rate metrics to v1 (#149)
* Add metrics for error rate * Counter server errors for error rate * Separate server and user errors * Not authorized errors are user errors * Catch invalid graphql requests * Catch invalid graphql requests
1 parent 059da6c commit d5e5f70

File tree

7 files changed

+315
-15
lines changed

7 files changed

+315
-15
lines changed

go.mod

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ require (
2222
github.com/onflow/flow-emulator v0.37.0
2323
github.com/onflow/flow-go-sdk v0.28.0
2424
github.com/pkg/errors v0.9.1
25+
github.com/prometheus/client_golang v1.12.1
2526
github.com/rs/cors v1.8.0
2627
github.com/sirupsen/logrus v1.8.1
2728
github.com/stretchr/testify v1.8.0
@@ -34,10 +35,12 @@ require (
3435

3536
require (
3637
github.com/agnivade/levenshtein v1.1.1 // indirect
38+
github.com/beorn7/perks v1.0.1 // indirect
3739
github.com/bits-and-blooms/bitset v1.3.0 // indirect
3840
github.com/btcsuite/btcd v0.22.1 // indirect
3941
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
4042
github.com/cespare/xxhash v1.1.0 // indirect
43+
github.com/cespare/xxhash/v2 v2.1.2 // indirect
4144
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect
4245
github.com/davecgh/go-spew v1.1.1 // indirect
4346
github.com/dgraph-io/badger/v2 v2.2007.4 // indirect
@@ -81,6 +84,7 @@ require (
8184
github.com/mattn/go-colorable v0.1.12 // indirect
8285
github.com/mattn/go-isatty v0.0.16 // indirect
8386
github.com/mattn/go-sqlite3 v1.14.15 // indirect
87+
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
8488
github.com/minio/sha256-simd v1.0.0 // indirect
8589
github.com/mr-tron/base58 v1.2.0 // indirect
8690
github.com/multiformats/go-base32 v0.0.4 // indirect
@@ -99,6 +103,9 @@ require (
99103
github.com/pelletier/go-toml v1.9.5 // indirect
100104
github.com/pelletier/go-toml/v2 v2.0.2 // indirect
101105
github.com/pmezard/go-difflib v1.0.0 // indirect
106+
github.com/prometheus/client_model v0.2.0 // indirect
107+
github.com/prometheus/common v0.37.0 // indirect
108+
github.com/prometheus/procfs v0.8.0 // indirect
102109
github.com/psiemens/sconfig v0.1.0 // indirect
103110
github.com/rivo/uniseg v0.2.1-0.20211004051800-57c86be7915a // indirect
104111
github.com/rs/zerolog v1.27.0 // indirect

go.sum

Lines changed: 48 additions & 0 deletions
Large diffs are not rendered by default.

handler.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ package playground
2121
import (
2222
"context"
2323
"fmt"
24+
"github.com/dapperlabs/flow-playground-api/middleware/errors"
25+
"github.com/dapperlabs/flow-playground-api/telemetry"
2426
"github.com/getsentry/sentry-go"
2527
"net/http"
2628
"runtime/debug"
@@ -32,13 +34,15 @@ import (
3234
func GraphQLHandler(resolver *Resolver, middlewares ...graphql.ResponseMiddleware) http.HandlerFunc {
3335
srv := gqlHandler.NewDefaultServer(NewExecutableSchema(Config{Resolvers: resolver}))
3436

37+
srv.Use(telemetry.NewMetrics())
38+
3539
for _, middleware := range middlewares {
3640
srv.AroundResponses(middleware)
3741
}
3842

3943
srv.SetRecoverFunc(func(ctx context.Context, err interface{}) (userMessage error) {
4044
sentry.CaptureException(fmt.Errorf("panic: %v, stack: %s", err, string(debug.Stack())))
41-
return fmt.Errorf("panic: %s\n\n%s", err, string(debug.Stack()))
45+
return errors.ServerErr
4246
})
4347

4448
return srv.ServeHTTP

middleware/errors/errors.go

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,29 @@ package errors
2020

2121
import (
2222
"context"
23-
23+
"errors"
24+
"fmt"
2425
"github.com/99designs/gqlgen/graphql"
25-
"github.com/sirupsen/logrus"
26-
26+
"github.com/dapperlabs/flow-playground-api/telemetry"
2727
"github.com/getsentry/sentry-go"
28+
"github.com/sirupsen/logrus"
2829
)
2930

31+
var ServerErr = errors.New("something went wrong, we are looking into the issue")
32+
var GraphqlErr = errors.New("invalid graphql request")
33+
34+
type UserError struct {
35+
msg string
36+
}
37+
38+
func NewUserError(msg string) *UserError {
39+
return &UserError{msg}
40+
}
41+
42+
func (i *UserError) Error() string {
43+
return fmt.Sprintf("user error: %s", i.msg)
44+
}
45+
3046
type errCtxKeyType string
3147

3248
var (
@@ -48,20 +64,27 @@ func Middleware(entry *logrus.Entry, localHub *sentry.Hub) graphql.ResponseMiddl
4864
res := next(ctx)
4965
errList := graphql.GetErrors(ctx)
5066

51-
for _, err := range errList {
67+
for i, err := range errList {
5268
contextEntry := entry.
5369
WithFields(debugFields)
5470

55-
if cause := err.Extensions["cause"]; cause != nil {
56-
contextEntry.
57-
WithError(cause.(error)).
58-
Error("GQL Request Server Error")
59-
sentryCtx := context.WithValue(ctx, sentryLevelCtxKey, sentry.LevelError)
60-
localHub.RecoverWithContext(sentryCtx, err)
71+
if code := err.Extensions["code"]; code != nil {
72+
res.Errors[i].Message = GraphqlErr.Error()
6173
} else if err != nil {
62-
contextEntry.WithError(err).Warnf("GQL Request Client Error: %v err = %+v", err.Extensions["general_error"], err)
63-
sentryCtx := context.WithValue(ctx, sentryLevelCtxKey, sentry.LevelWarning)
64-
localHub.RecoverWithContext(sentryCtx, err)
74+
var userErr *UserError
75+
if errors.As(err, &userErr) {
76+
telemetry.UserErrorCounter.Inc()
77+
res.Extensions["code"] = "BAD_REQUEST"
78+
} else {
79+
localHub.CaptureException(err)
80+
telemetry.ServerErrorCounter.Inc()
81+
res.Errors[i].Message = ServerErr.Error()
82+
res.Extensions["code"] = "INTERNAL_SERVER_ERROR"
83+
}
84+
85+
contextEntry.
86+
WithError(err).
87+
Warnf("GQL Request Client Error: %v err = %+v", err.Extensions["general_error"], err)
6588
}
6689
}
6790

resolver.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/dapperlabs/flow-playground-api/auth"
2626
"github.com/dapperlabs/flow-playground-api/blockchain"
2727
"github.com/dapperlabs/flow-playground-api/controller"
28+
userErr "github.com/dapperlabs/flow-playground-api/middleware/errors"
2829
"github.com/dapperlabs/flow-playground-api/migrate"
2930
"github.com/dapperlabs/flow-playground-api/model"
3031
"github.com/dapperlabs/flow-playground-api/storage"
@@ -109,7 +110,7 @@ func (r *mutationResolver) authorize(ctx context.Context, ID uuid.UUID) error {
109110
}
110111

111112
if err := r.auth.CheckProjectAccess(ctx, proj); err != nil {
112-
return err
113+
return userErr.NewUserError("not authorized")
113114
}
114115

115116
return nil

server/server.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ package main
2020

2121
import (
2222
"fmt"
23+
"github.com/dapperlabs/flow-playground-api/telemetry"
24+
"github.com/prometheus/client_golang/prometheus/promhttp"
2325
"log"
2426
"net/http"
2527
"strings"
@@ -209,6 +211,9 @@ func main() {
209211

210212
router.HandleFunc("/ping", ping)
211213

214+
router.Handle("/metrics", promhttp.Handler())
215+
defer telemetry.UnRegisterMetrics()
216+
212217
logStartMessage(build.Version())
213218

214219
log.Printf("Connect to http://localhost:%d/ for GraphQL playground", conf.Port)

telemetry/metrics.go

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/*
2+
* Flow Playground
3+
*
4+
* Copyright 2019 Dapper Labs, Inc.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package telemetry
20+
21+
import (
22+
"context"
23+
"time"
24+
25+
"github.com/99designs/gqlgen/graphql"
26+
"github.com/prometheus/client_golang/prometheus"
27+
)
28+
29+
const (
30+
existStatusFailure = "failure"
31+
exitStatusSuccess = "success"
32+
)
33+
34+
var (
35+
registered bool
36+
requestStartedCounter prometheus.Counter
37+
requestCompletedCounter prometheus.Counter
38+
resolverStartedCounter *prometheus.CounterVec
39+
resolverCompletedCounter *prometheus.CounterVec
40+
timeToResolveField *prometheus.HistogramVec
41+
timeToHandleRequest *prometheus.HistogramVec
42+
ServerErrorCounter prometheus.Counter
43+
UserErrorCounter prometheus.Counter
44+
)
45+
46+
type (
47+
RequestsMetrics struct{}
48+
)
49+
50+
var _ interface {
51+
graphql.HandlerExtension
52+
graphql.OperationInterceptor
53+
graphql.ResponseInterceptor
54+
graphql.FieldInterceptor
55+
} = RequestsMetrics{}
56+
57+
func NewMetrics() graphql.HandlerExtension {
58+
RegisterMetrics()
59+
return RequestsMetrics{}
60+
}
61+
62+
func RegisterMetrics() {
63+
if !registered {
64+
RegisterOn(prometheus.DefaultRegisterer)
65+
registered = true
66+
}
67+
}
68+
69+
func RegisterOn(registerer prometheus.Registerer) {
70+
requestStartedCounter = prometheus.NewCounter(
71+
prometheus.CounterOpts{
72+
Name: "graphql_request_started_total",
73+
Help: "Total number of requests started on the graphql server.",
74+
},
75+
)
76+
77+
requestCompletedCounter = prometheus.NewCounter(
78+
prometheus.CounterOpts{
79+
Name: "graphql_request_completed_total",
80+
Help: "Total number of requests completed on the graphql server.",
81+
},
82+
)
83+
84+
resolverStartedCounter = prometheus.NewCounterVec(
85+
prometheus.CounterOpts{
86+
Name: "graphql_resolver_started_total",
87+
Help: "Total number of resolver started on the graphql server.",
88+
},
89+
[]string{"object", "field"},
90+
)
91+
92+
resolverCompletedCounter = prometheus.NewCounterVec(
93+
prometheus.CounterOpts{
94+
Name: "graphql_resolver_completed_total",
95+
Help: "Total number of resolver completed on the graphql server.",
96+
},
97+
[]string{"object", "field"},
98+
)
99+
100+
timeToResolveField = prometheus.NewHistogramVec(prometheus.HistogramOpts{
101+
Name: "graphql_resolver_duration_ms",
102+
Help: "The time taken to resolve a field by graphql server.",
103+
Buckets: prometheus.ExponentialBuckets(1, 2, 11),
104+
}, []string{"exitStatus", "object", "field"})
105+
106+
timeToHandleRequest = prometheus.NewHistogramVec(prometheus.HistogramOpts{
107+
Name: "graphql_request_duration_ms",
108+
Help: "The time taken to handle a request by graphql server.",
109+
Buckets: prometheus.ExponentialBuckets(1, 2, 11),
110+
}, []string{"exitStatus"})
111+
112+
ServerErrorCounter = prometheus.NewCounter(
113+
prometheus.CounterOpts{
114+
Name: "server_error_total",
115+
Help: "Total number of internal server errors.",
116+
},
117+
)
118+
119+
UserErrorCounter = prometheus.NewCounter(
120+
prometheus.CounterOpts{
121+
Name: "user_error_total",
122+
Help: "Total number of bad requests from users.",
123+
},
124+
)
125+
126+
registerer.MustRegister(
127+
requestStartedCounter,
128+
requestCompletedCounter,
129+
resolverStartedCounter,
130+
resolverCompletedCounter,
131+
timeToResolveField,
132+
timeToHandleRequest,
133+
ServerErrorCounter,
134+
UserErrorCounter,
135+
)
136+
}
137+
138+
func UnRegisterMetrics() {
139+
if registered {
140+
UnRegisterFrom(prometheus.DefaultRegisterer)
141+
registered = false
142+
}
143+
}
144+
145+
func UnRegisterFrom(registerer prometheus.Registerer) {
146+
registerer.Unregister(requestStartedCounter)
147+
registerer.Unregister(requestCompletedCounter)
148+
registerer.Unregister(resolverStartedCounter)
149+
registerer.Unregister(resolverCompletedCounter)
150+
registerer.Unregister(timeToResolveField)
151+
registerer.Unregister(timeToHandleRequest)
152+
registerer.Unregister(ServerErrorCounter)
153+
registerer.Unregister(UserErrorCounter)
154+
}
155+
156+
func (a RequestsMetrics) ExtensionName() string {
157+
return "Prometheus"
158+
}
159+
160+
func (a RequestsMetrics) Validate(schema graphql.ExecutableSchema) error {
161+
return nil
162+
}
163+
164+
func (a RequestsMetrics) InterceptOperation(ctx context.Context, next graphql.OperationHandler) graphql.ResponseHandler {
165+
requestStartedCounter.Inc()
166+
return next(ctx)
167+
}
168+
169+
func (a RequestsMetrics) InterceptResponse(ctx context.Context, next graphql.ResponseHandler) *graphql.Response {
170+
errList := graphql.GetErrors(ctx)
171+
172+
var exitStatus string
173+
if len(errList) > 0 {
174+
exitStatus = existStatusFailure
175+
} else {
176+
exitStatus = exitStatusSuccess
177+
}
178+
179+
oc := graphql.GetOperationContext(ctx)
180+
observerStart := oc.Stats.OperationStart
181+
182+
timeToHandleRequest.With(prometheus.Labels{"exitStatus": exitStatus}).
183+
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
184+
185+
requestCompletedCounter.Inc()
186+
187+
return next(ctx)
188+
}
189+
190+
func (a RequestsMetrics) InterceptField(ctx context.Context, next graphql.Resolver) (interface{}, error) {
191+
fc := graphql.GetFieldContext(ctx)
192+
193+
resolverStartedCounter.WithLabelValues(fc.Object, fc.Field.Name).Inc()
194+
195+
observerStart := time.Now()
196+
197+
res, err := next(ctx)
198+
199+
var exitStatus string
200+
if err != nil {
201+
exitStatus = existStatusFailure
202+
} else {
203+
exitStatus = exitStatusSuccess
204+
}
205+
206+
timeToResolveField.WithLabelValues(exitStatus, fc.Object, fc.Field.Name).
207+
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
208+
209+
resolverCompletedCounter.WithLabelValues(fc.Object, fc.Field.Name).Inc()
210+
211+
return res, err
212+
}

0 commit comments

Comments
 (0)