forked from caddyserver/caddy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetrics.go
More file actions
330 lines (291 loc) · 10.6 KB
/
metrics.go
File metadata and controls
330 lines (291 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
package caddyhttp
import (
"context"
"errors"
"net/http"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/internal/metrics"
)
// Metrics configures metrics observations.
// EXPERIMENTAL and subject to change or removal.
//
// Example configuration:
//
// {
// "apps": {
// "http": {
// "metrics": {
// "per_host": true,
// "observe_catchall_hosts": false
// },
// "servers": {
// "srv0": {
// "routes": [{
// "match": [{"host": ["example.com", "www.example.com"]}],
// "handle": [{"handler": "static_response", "body": "Hello"}]
// }]
// }
// }
// }
// }
// }
//
// In this configuration:
// - Requests to example.com and www.example.com get individual host labels
// - All other hosts (e.g., attacker.com) are aggregated under "_other" label
// - This prevents unlimited cardinality from arbitrary Host headers
type Metrics struct {
// Enable per-host metrics. Enabling this option may
// incur high-memory consumption, depending on the number of hosts
// managed by Caddy.
//
// CARDINALITY PROTECTION: To prevent unbounded cardinality attacks,
// only explicitly configured hosts (via host matchers) are allowed
// by default. Other hosts are aggregated under the "_other" label.
// See AllowCatchAllHosts to change this behavior.
PerHost bool `json:"per_host,omitempty"`
// Allow metrics for catch-all hosts (hosts without explicit configuration).
// When false (default), only hosts explicitly configured via host matchers
// will get individual metrics labels. All other hosts will be aggregated
// under the "_other" label to prevent cardinality explosion.
//
// This is automatically enabled for HTTPS servers (since certificates provide
// some protection against unbounded cardinality), but disabled for HTTP servers
// by default to prevent cardinality attacks from arbitrary Host headers.
//
// Set to true to allow all hosts to get individual metrics (NOT RECOMMENDED
// for production environments exposed to the internet).
ObserveCatchallHosts bool `json:"observe_catchall_hosts,omitempty"`
init sync.Once
httpMetrics *httpMetrics
allowedHosts map[string]struct{}
hasHTTPSServer bool
}
type httpMetrics struct {
requestInFlight *prometheus.GaugeVec
requestCount *prometheus.CounterVec
requestErrors *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec
responseSize *prometheus.HistogramVec
responseDuration *prometheus.HistogramVec
}
func initHTTPMetrics(ctx caddy.Context, metrics *Metrics) {
const ns, sub = "caddy", "http"
registry := ctx.GetMetricsRegistry()
basicLabels := []string{"server", "handler"}
if metrics.PerHost {
basicLabels = append(basicLabels, "host")
}
metrics.httpMetrics.requestInFlight = promauto.With(registry).NewGaugeVec(prometheus.GaugeOpts{
Namespace: ns,
Subsystem: sub,
Name: "requests_in_flight",
Help: "Number of requests currently handled by this server.",
}, basicLabels)
metrics.httpMetrics.requestErrors = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_errors_total",
Help: "Number of requests resulting in middleware errors.",
}, basicLabels)
metrics.httpMetrics.requestCount = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "requests_total",
Help: "Counter of HTTP(S) requests made.",
}, basicLabels)
// TODO: allow these to be customized in the config
durationBuckets := prometheus.DefBuckets
sizeBuckets := prometheus.ExponentialBuckets(256, 4, 8)
httpLabels := []string{"server", "handler", "code", "method"}
if metrics.PerHost {
httpLabels = append(httpLabels, "host")
}
metrics.httpMetrics.requestDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_duration_seconds",
Help: "Histogram of round-trip request durations.",
Buckets: durationBuckets,
}, httpLabels)
metrics.httpMetrics.requestSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_size_bytes",
Help: "Total size of the request. Includes body",
Buckets: sizeBuckets,
}, httpLabels)
metrics.httpMetrics.responseSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "response_size_bytes",
Help: "Size of the returned response.",
Buckets: sizeBuckets,
}, httpLabels)
metrics.httpMetrics.responseDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "response_duration_seconds",
Help: "Histogram of times to first byte in response bodies.",
Buckets: durationBuckets,
}, httpLabels)
}
// scanConfigForHosts scans the HTTP app configuration to build a set of allowed hosts
// for metrics collection, similar to how auto-HTTPS scans for domain names.
func (m *Metrics) scanConfigForHosts(app *App) {
if !m.PerHost {
return
}
m.allowedHosts = make(map[string]struct{})
m.hasHTTPSServer = false
for _, srv := range app.Servers {
// Check if this server has TLS enabled
serverHasTLS := len(srv.TLSConnPolicies) > 0
if serverHasTLS {
m.hasHTTPSServer = true
}
// Collect hosts from route matchers
for _, route := range srv.Routes {
for _, matcherSet := range route.MatcherSets {
for _, matcher := range matcherSet {
if hm, ok := matcher.(*MatchHost); ok {
for _, host := range *hm {
// Only allow non-fuzzy hosts to prevent unbounded cardinality
if !hm.fuzzy(host) {
m.allowedHosts[strings.ToLower(host)] = struct{}{}
}
}
}
}
}
}
}
}
// shouldAllowHostMetrics determines if metrics should be collected for the given host.
// This implements the cardinality protection by only allowing metrics for:
// 1. Explicitly configured hosts
// 2. Catch-all requests on HTTPS servers (if AllowCatchAllHosts is true or auto-enabled)
// 3. Catch-all requests on HTTP servers only if explicitly allowed
func (m *Metrics) shouldAllowHostMetrics(host string, isHTTPS bool) bool {
if !m.PerHost {
return true // host won't be used in labels anyway
}
normalizedHost := strings.ToLower(host)
// Always allow explicitly configured hosts
if _, exists := m.allowedHosts[normalizedHost]; exists {
return true
}
// For catch-all requests (not in allowed hosts)
allowCatchAll := m.ObserveCatchallHosts || (isHTTPS && m.hasHTTPSServer)
return allowCatchAll
}
// serverNameFromContext extracts the current server name from the context.
// Returns "UNKNOWN" if none is available (should probably never happen).
func serverNameFromContext(ctx context.Context) string {
srv, ok := ctx.Value(ServerCtxKey).(*Server)
if !ok || srv == nil || srv.name == "" {
return "UNKNOWN"
}
return srv.name
}
// metricsInstrumentedRoute wraps a compiled route Handler with metrics
// instrumentation. It wraps the entire compiled route chain once,
// collecting metrics only once per route match.
type metricsInstrumentedRoute struct {
handler string
next Handler
metrics *Metrics
}
func newMetricsInstrumentedRoute(ctx caddy.Context, handler string, next Handler, m *Metrics) *metricsInstrumentedRoute {
m.init.Do(func() {
initHTTPMetrics(ctx, m)
})
return &metricsInstrumentedRoute{handler: handler, next: next, metrics: m}
}
func (h *metricsInstrumentedRoute) ServeHTTP(w http.ResponseWriter, r *http.Request) error {
server := serverNameFromContext(r.Context())
labels := prometheus.Labels{"server": server, "handler": h.handler}
method := metrics.SanitizeMethod(r.Method)
// the "code" value is set later, but initialized here to eliminate the possibility
// of a panic
statusLabels := prometheus.Labels{"server": server, "handler": h.handler, "method": method, "code": ""}
// Determine if this is an HTTPS request
isHTTPS := r.TLS != nil
if h.metrics.PerHost {
// Apply cardinality protection for host metrics
if h.metrics.shouldAllowHostMetrics(r.Host, isHTTPS) {
labels["host"] = strings.ToLower(r.Host)
statusLabels["host"] = strings.ToLower(r.Host)
} else {
// Use a catch-all label for unallowed hosts to prevent cardinality explosion
labels["host"] = "_other"
statusLabels["host"] = "_other"
}
}
inFlight := h.metrics.httpMetrics.requestInFlight.With(labels)
inFlight.Inc()
defer inFlight.Dec()
start := time.Now()
// This is a _bit_ of a hack - it depends on the ShouldBufferFunc always
// being called when the headers are written.
// Effectively the same behaviour as promhttp.InstrumentHandlerTimeToWriteHeader.
writeHeaderRecorder := ShouldBufferFunc(func(status int, header http.Header) bool {
statusLabels["code"] = metrics.SanitizeCode(status)
ttfb := time.Since(start).Seconds()
h.metrics.httpMetrics.responseDuration.With(statusLabels).Observe(ttfb)
return false
})
wrec := NewResponseRecorder(w, nil, writeHeaderRecorder)
err := h.next.ServeHTTP(wrec, r)
dur := time.Since(start).Seconds()
h.metrics.httpMetrics.requestCount.With(labels).Inc()
observeRequest := func(status int) {
// If the code hasn't been set yet, and we didn't encounter an error, we're
// probably falling through with an empty handler.
if statusLabels["code"] == "" {
// we still sanitize it, even though it's likely to be 0. A 200 is
// returned on fallthrough so we want to reflect that.
statusLabels["code"] = metrics.SanitizeCode(status)
}
h.metrics.httpMetrics.requestDuration.With(statusLabels).Observe(dur)
h.metrics.httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r)))
h.metrics.httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size()))
}
if err != nil {
var handlerErr HandlerError
if errors.As(err, &handlerErr) {
observeRequest(handlerErr.StatusCode)
}
h.metrics.httpMetrics.requestErrors.With(labels).Inc()
return err
}
observeRequest(wrec.Status())
return nil
}
// taken from https://github.com/prometheus/client_golang/blob/6007b2b5cae01203111de55f753e76d8dac1f529/prometheus/promhttp/instrument_server.go#L298
func computeApproximateRequestSize(r *http.Request) int {
s := 0
if r.URL != nil {
s += len(r.URL.String())
}
s += len(r.Method)
s += len(r.Proto)
for name, values := range r.Header {
s += len(name)
for _, value := range values {
s += len(value)
}
}
s += len(r.Host)
// N.B. r.Form and r.MultipartForm are assumed to be included in r.URL.
if r.ContentLength != -1 {
s += int(r.ContentLength)
}
return s
}