Skip to content

Commit ebb0764

Browse files
authored
feat: expose metrics from the kubenurse httpclient (#31)
The following new metrics were added: * kubenurse_httpclient_requests_total - Total issued requests by kubenurse, partitioned by http code/method. * kubenurse_httpclient_trace_request_duration_seconds - Latency histogram for requests from the kubenurse httpclient, partitioned by event. * httpclient_request_duration_seconds - Latency histogram of request latencies from the kubenurse httpclient.
1 parent 9f02d56 commit ebb0764

File tree

6 files changed

+154
-87
lines changed

6 files changed

+154
-87
lines changed

internal/kubenurse/server.go

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package kubenurse
44
import (
55
"context"
66
"fmt"
7-
"log"
87
"net/http"
98
"os"
109
"sync"
@@ -29,8 +28,6 @@ type Server struct {
2928
useTLS bool
3029
// If we want to consider kubenurses on unschedulable nodes
3130
allowUnschedulable bool
32-
extraCA string
33-
insecure bool
3431

3532
// Mutex to protect ready flag
3633
mu *sync.Mutex
@@ -39,15 +36,13 @@ type Server struct {
3936

4037
// New creates a new kubenurse server. The server can be configured with the following environment variables:
4138
// * KUBENURSE_USE_TLS
42-
// * KUBENURSE_ALLOW_UNSCHEDULABL
39+
// * KUBENURSE_ALLOW_UNSCHEDULABLE
4340
// * KUBENURSE_INGRESS_URL
4441
// * KUBENURSE_SERVICE_URL
4542
// * KUBERNETES_SERVICE_HOST
4643
// * KUBERNETES_SERVICE_PORT
4744
// * KUBENURSE_NAMESPACE
4845
// * KUBENURSE_NEIGHBOUR_FILTER
49-
// * KUBENURSE_EXTRA_CA
50-
// * KUBENURSE_INSECURE
5146
func New(ctx context.Context, k8s kubernetes.Interface) (*Server, error) {
5247
mux := http.NewServeMux()
5348

@@ -64,8 +59,6 @@ func New(ctx context.Context, k8s kubernetes.Interface) (*Server, error) {
6459
//nolint:goconst // No need to make "true" a constant in my opinion, readability is better like this.
6560
useTLS: os.Getenv("KUBENURSE_USE_TLS") == "true",
6661
allowUnschedulable: os.Getenv("KUBENURSE_ALLOW_UNSCHEDULABLE") == "true",
67-
extraCA: os.Getenv("KUBENURSE_EXTRA_CA"),
68-
insecure: os.Getenv("KUBENURSE_INSECURE") == "true",
6962

7063
mu: new(sync.Mutex),
7164
ready: true,
@@ -77,26 +70,13 @@ func New(ctx context.Context, k8s kubernetes.Interface) (*Server, error) {
7770
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
7871
)
7972

80-
// setup http transport
81-
transport, err := server.generateRoundTripper()
82-
if err != nil {
83-
log.Printf("using default transport: %s", err)
84-
85-
transport = http.DefaultTransport
86-
}
87-
88-
httpClient := &http.Client{
89-
Timeout: 5 * time.Second,
90-
Transport: transport,
91-
}
92-
9373
discovery, err := kubediscovery.New(ctx, k8s, server.allowUnschedulable)
9474
if err != nil {
9575
return nil, fmt.Errorf("create k8s discovery client: %w", err)
9676
}
9777

9878
// setup checker
99-
chk, err := servicecheck.New(ctx, httpClient, discovery, promRegistry, server.allowUnschedulable, 3*time.Second)
79+
chk, err := servicecheck.New(ctx, discovery, promRegistry, server.allowUnschedulable, 3*time.Second)
10080
if err != nil {
10181
return nil, err
10282
}
@@ -134,7 +114,6 @@ func (s *Server) Run() error {
134114
defer wg.Done()
135115

136116
s.checker.RunScheduled(5 * time.Second)
137-
log.Printf("checker exited")
138117
}()
139118

140119
wg.Add(1)

internal/kubenurse/transport.go

Lines changed: 0 additions & 55 deletions
This file was deleted.

internal/servicecheck/httptrace.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package servicecheck
2+
3+
import (
4+
"net/http"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
"github.com/prometheus/client_golang/prometheus/promhttp"
8+
)
9+
10+
func withRequestTracing(registry *prometheus.Registry, transport http.RoundTripper) http.RoundTripper {
11+
counter := prometheus.NewCounterVec(
12+
prometheus.CounterOpts{
13+
Namespace: metricsNamespace,
14+
Name: "httpclient_requests_total",
15+
Help: "A counter for requests from the kubenurse http client.",
16+
},
17+
[]string{"code", "method"},
18+
)
19+
20+
latencyVec := prometheus.NewHistogramVec(
21+
prometheus.HistogramOpts{
22+
Namespace: metricsNamespace,
23+
Name: "httpclient_trace_request_duration_seconds",
24+
Help: "Latency histogram for requests from the kubenurse http client. Time in seconds since the start of the http request.",
25+
Buckets: []float64{.0005, .005, .01, .025, .05, .1, .25, .5, 1},
26+
},
27+
[]string{"event"},
28+
)
29+
30+
// histVec has no labels, making it a zero-dimensional ObserverVec.
31+
histVec := prometheus.NewHistogramVec(
32+
prometheus.HistogramOpts{
33+
Namespace: metricsNamespace,
34+
Name: "httpclient_request_duration_seconds",
35+
Help: "A latency histogram of request latencies from the kubenurse http client.",
36+
Buckets: prometheus.DefBuckets,
37+
},
38+
[]string{},
39+
)
40+
41+
// Register all of the metrics in the standard registry.
42+
registry.MustRegister(counter, latencyVec, histVec)
43+
44+
// Define functions for the available httptrace.ClientTrace hook
45+
// functions that we want to instrument.
46+
trace := &promhttp.InstrumentTrace{
47+
DNSStart: func(t float64) {
48+
latencyVec.WithLabelValues("dns_start").Observe(t)
49+
},
50+
DNSDone: func(t float64) {
51+
latencyVec.WithLabelValues("dns_done").Observe(t)
52+
},
53+
ConnectStart: func(t float64) {
54+
latencyVec.WithLabelValues("connect_start").Observe(t)
55+
},
56+
ConnectDone: func(t float64) {
57+
latencyVec.WithLabelValues("connect_done").Observe(t)
58+
},
59+
TLSHandshakeStart: func(t float64) {
60+
latencyVec.WithLabelValues("tls_handshake_start").Observe(t)
61+
},
62+
TLSHandshakeDone: func(t float64) {
63+
latencyVec.WithLabelValues("tls_handshake_done").Observe(t)
64+
},
65+
WroteRequest: func(t float64) {
66+
latencyVec.WithLabelValues("wrote_request").Observe(t)
67+
},
68+
GotFirstResponseByte: func(t float64) {
69+
latencyVec.WithLabelValues("got_first_resp_byte").Observe(t)
70+
},
71+
}
72+
73+
// Wrap the default RoundTripper with middleware.
74+
roundTripper := promhttp.InstrumentRoundTripperCounter(counter,
75+
promhttp.InstrumentRoundTripperTrace(trace,
76+
promhttp.InstrumentRoundTripperDuration(histVec,
77+
transport,
78+
),
79+
),
80+
)
81+
82+
return roundTripper
83+
}

internal/servicecheck/servicecheck.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,36 @@ import (
66
"fmt"
77
"log"
88
"net/http"
9+
"os"
910
"time"
1011

1112
"github.com/postfinance/kubenurse/internal/kubediscovery"
1213
"github.com/prometheus/client_golang/prometheus"
1314
)
1415

1516
const (
16-
okStr = "ok"
17-
errStr = "error"
17+
okStr = "ok"
18+
errStr = "error"
19+
metricsNamespace = "kubenurse"
1820
)
1921

2022
// New configures the checker with a httpClient and a cache timeout for check
2123
// results. Other parameters of the Checker struct need to be configured separately.
22-
func New(ctx context.Context, httpClient *http.Client, discovery *kubediscovery.Client,
23-
promRegistry *prometheus.Registry, allowUnschedulable bool, cacheTTL time.Duration) (*Checker, error) {
24+
func New(ctx context.Context, discovery *kubediscovery.Client, promRegistry *prometheus.Registry,
25+
allowUnschedulable bool, cacheTTL time.Duration) (*Checker, error) {
2426
errorCounter := prometheus.NewCounterVec(
2527
prometheus.CounterOpts{
26-
Name: "kubenurse_errors_total",
27-
Help: "Kubenurse error counter partitioned by error type",
28+
Namespace: metricsNamespace,
29+
Name: "errors_total",
30+
Help: "Kubenurse error counter partitioned by error type",
2831
},
2932
[]string{"type"},
3033
)
3134

3235
durationSummary := prometheus.NewSummaryVec(
3336
prometheus.SummaryOpts{
34-
Name: "kubenurse_request_duration",
37+
Namespace: metricsNamespace,
38+
Name: "request_duration",
3539
Help: "Kubenurse request duration partitioned by error type",
3640
MaxAge: 1 * time.Minute,
3741
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
@@ -41,6 +45,19 @@ func New(ctx context.Context, httpClient *http.Client, discovery *kubediscovery.
4145

4246
promRegistry.MustRegister(errorCounter, durationSummary)
4347

48+
// setup http transport
49+
transport, err := generateRoundTripper(os.Getenv("KUBENURSE_EXTRA_CA"), os.Getenv("KUBENURSE_INSECURE") == "true")
50+
if err != nil {
51+
log.Printf("using default transport: %s", err)
52+
53+
transport = http.DefaultTransport
54+
}
55+
56+
httpClient := &http.Client{
57+
Timeout: 5 * time.Second,
58+
Transport: withRequestTracing(promRegistry, transport),
59+
}
60+
4461
return &Checker{
4562
allowUnschedulable: allowUnschedulable,
4663
discovery: discovery,

internal/servicecheck/servicecheck_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package servicecheck
22

33
import (
44
"context"
5-
"net/http"
65
"testing"
76
"time"
87

@@ -41,7 +40,7 @@ func TestCombined(t *testing.T) {
4140
discovery, err := kubediscovery.New(context.Background(), fakeClient, false)
4241
r.NoError(err)
4342

44-
checker, err := New(context.Background(), http.DefaultClient, discovery, prometheus.NewRegistry(), false, 3*time.Second)
43+
checker, err := New(context.Background(), discovery, prometheus.NewRegistry(), false, 3*time.Second)
4544
r.NoError(err)
4645
r.NotNil(checker)
4746

internal/servicecheck/transport.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package servicecheck
22

33
import (
4+
"crypto/tls"
5+
"crypto/x509"
46
"errors"
57
"fmt"
68
"net/http"
@@ -11,6 +13,7 @@ import (
1113
const (
1214
//nolint:gosec // This is the well-known path to Kubernetes serviceaccount tokens.
1315
tokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
16+
caFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
1417
)
1518

1619
// doRequest does an http request only to get the http status code
@@ -42,3 +45,44 @@ func (c *Checker) doRequest(url string) (string, error) {
4245

4346
return resp.Status, errors.New(resp.Status)
4447
}
48+
49+
// generateRoundTripper returns a custom http.RoundTripper, including the k8s CA.
50+
func generateRoundTripper(extraCA string, insecure bool) (http.RoundTripper, error) {
51+
// Append default certpool
52+
rootCAs, _ := x509.SystemCertPool()
53+
if rootCAs == nil {
54+
rootCAs = x509.NewCertPool()
55+
}
56+
57+
// Append ServiceAccount cacert
58+
caCert, err := os.ReadFile(caFile)
59+
if err != nil {
60+
return nil, fmt.Errorf("could not load certificate %s: %w", caFile, err)
61+
}
62+
63+
if ok := rootCAs.AppendCertsFromPEM(caCert); !ok {
64+
return nil, errors.New("could not append ca cert to system certpool")
65+
}
66+
67+
// Append extra CA, if set
68+
if extraCA != "" {
69+
caCert, err := os.ReadFile(extraCA) //nolint:gosec // Intentionally included by the user.
70+
if err != nil {
71+
return nil, fmt.Errorf("could not load certificate %s: %w", extraCA, err)
72+
}
73+
74+
if ok := rootCAs.AppendCertsFromPEM(caCert); !ok {
75+
return nil, errors.New("could not append extra ca cert to system certpool")
76+
}
77+
}
78+
79+
// Configure transport
80+
tlsConfig := &tls.Config{
81+
InsecureSkipVerify: insecure, //nolint:gosec // Can be true if the user requested this.
82+
RootCAs: rootCAs,
83+
}
84+
85+
transport := &http.Transport{TLSClientConfig: tlsConfig}
86+
87+
return transport, nil
88+
}

0 commit comments

Comments
 (0)