Skip to content

Commit 362c3c7

Browse files
Add timeout for /health endpoint checks (#145)
* Add timeout for /health endpoint checks * Switch back * Tests * Use timeout * Use timeout server * Descriptive test case naming * Error check
1 parent c9c1a75 commit 362c3c7

File tree

7 files changed

+67
-9
lines changed

7 files changed

+67
-9
lines changed

cmd/heartbeat/health/checker.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,23 @@ import (
88
type Checker struct {
99
pp *PortProbe
1010
k8s *KubernetesClient
11+
ec *EndpointClient
1112
}
1213

1314
// NewChecker creates a new Checker.
14-
func NewChecker(pp *PortProbe) *Checker {
15+
func NewChecker(pp *PortProbe, ec *EndpointClient) *Checker {
1516
return &Checker{
1617
pp: pp,
18+
ec: ec,
1719
}
1820
}
1921

2022
// NewCheckerK8S creates a new Checker for Kubernetes deployments.
21-
func NewCheckerK8S(pp *PortProbe, k8s *KubernetesClient) *Checker {
23+
func NewCheckerK8S(pp *PortProbe, k8s *KubernetesClient, ec *EndpointClient) *Checker {
2224
return &Checker{
2325
pp: pp,
2426
k8s: k8s,
27+
ec: ec,
2528
}
2629
}
2730

@@ -38,7 +41,7 @@ func (hc *Checker) GetHealth(ctx context.Context) float64 {
3841
// Some experiments might not support a /health endpoint, so
3942
// the result is only taken into account if the request error
4043
// is nil.
41-
status, err := checkHealthEndpoint()
44+
status, err := hc.ec.checkHealthEndpoint()
4245
if err == nil && !status {
4346
return 0
4447
}

cmd/heartbeat/health/checker_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ func TestChecker_getHealth(t *testing.T) {
2323
&KubernetesClient{
2424
clientset: healthyClientset,
2525
},
26+
&EndpointClient{},
2627
),
2728
endpointStatus: 200,
2829
want: 1,
@@ -31,6 +32,7 @@ func TestChecker_getHealth(t *testing.T) {
3132
name: "health-1-k8s-nil",
3233
checker: NewChecker(
3334
&PortProbe{},
35+
&EndpointClient{},
3436
),
3537
endpointStatus: 200,
3638
want: 1,
@@ -44,6 +46,7 @@ func TestChecker_getHealth(t *testing.T) {
4446
&KubernetesClient{
4547
clientset: healthyClientset,
4648
},
49+
&EndpointClient{},
4750
),
4851
endpointStatus: 200,
4952
want: 0,
@@ -55,6 +58,7 @@ func TestChecker_getHealth(t *testing.T) {
5558
&KubernetesClient{
5659
clientset: fake.NewSimpleClientset(),
5760
},
61+
&EndpointClient{},
5862
),
5963
endpointStatus: 200,
6064
want: 1,
@@ -79,6 +83,7 @@ func TestChecker_getHealth(t *testing.T) {
7983
},
8084
),
8185
},
86+
&EndpointClient{},
8287
),
8388
endpointStatus: 200,
8489
want: 0,
@@ -90,6 +95,7 @@ func TestChecker_getHealth(t *testing.T) {
9095
&KubernetesClient{
9196
clientset: healthyClientset,
9297
},
98+
&EndpointClient{},
9399
),
94100
endpointStatus: 500,
95101
want: 0,
@@ -116,6 +122,7 @@ func TestChecker_getHealth(t *testing.T) {
116122
},
117123
),
118124
},
125+
&EndpointClient{},
119126
),
120127
want: 0,
121128
},
@@ -125,6 +132,7 @@ func TestChecker_getHealth(t *testing.T) {
125132
&PortProbe{
126133
ports: map[string]bool{"65536": true},
127134
},
135+
&EndpointClient{},
128136
),
129137
want: 0,
130138
},

cmd/heartbeat/health/endpoint-check.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package health
22

33
import (
44
"net/http"
5+
"time"
56

67
"github.com/m-lab/locate/metrics"
78
)
@@ -10,11 +11,26 @@ var (
1011
healthAddress = "http://localhost:8000/health"
1112
)
1213

14+
// EndpointClient is an http client to check the local /health endpoint.
15+
type EndpointClient struct {
16+
http.Client
17+
}
18+
19+
// NewEndpointClient returns a new *EndpointClient with the specified request
20+
// timeout.
21+
func NewEndpointClient(timeout time.Duration) *EndpointClient {
22+
return &EndpointClient{
23+
http.Client{
24+
Timeout: timeout,
25+
},
26+
}
27+
}
28+
1329
// checkHealthEndpoint makes a call to the the local /health endpoint.
1430
// It returns an error if the HTTP request was not successful.
1531
// It returns true only if the returned HTTP status code equals 200 (OK).
16-
func checkHealthEndpoint() (bool, error) {
17-
resp, err := http.Get(healthAddress)
32+
func (ec *EndpointClient) checkHealthEndpoint() (bool, error) {
33+
resp, err := ec.Get(healthAddress)
1834
if err != nil {
1935
metrics.HealthEndpointChecksTotal.WithLabelValues("HTTP request error").Inc()
2036
return false, err

cmd/heartbeat/health/endpoint-check_test.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package health
33
import (
44
"net/http"
55
"testing"
6+
"time"
67

78
"github.com/m-lab/locate/cmd/heartbeat/health/healthtest"
89
)
@@ -27,7 +28,7 @@ func Test_checkHealthEndpoint(t *testing.T) {
2728
wantErr: false,
2829
},
2930
{
30-
name: "error",
31+
name: "error-server-not-running",
3132
want: false,
3233
wantErr: true,
3334
},
@@ -40,7 +41,8 @@ func Test_checkHealthEndpoint(t *testing.T) {
4041
defer srv.Close()
4142
}
4243

43-
got, err := checkHealthEndpoint()
44+
hc := NewEndpointClient(time.Second)
45+
got, err := hc.checkHealthEndpoint()
4446
if (err != nil) != tt.wantErr {
4547
t.Errorf("checkHealthEndpoint() error = %v, wantErr %v", err, tt.wantErr)
4648
return
@@ -52,3 +54,20 @@ func Test_checkHealthEndpoint(t *testing.T) {
5254
})
5355
}
5456
}
57+
58+
func Test_checkHealthEndpoint_timeout(t *testing.T) {
59+
srv := healthtest.TestTimeoutServer(2 * time.Second)
60+
healthAddress = srv.URL + "/health"
61+
defer srv.Close()
62+
63+
hc := NewEndpointClient(time.Second)
64+
got, err := hc.checkHealthEndpoint()
65+
if err == nil {
66+
t.Errorf("checkHealthEndpoint() error = %v, wantErr %s", err, "context deadline error")
67+
return
68+
}
69+
70+
if got != false {
71+
t.Errorf("checkHealthEndpoint() = %v, want %v", got, false)
72+
}
73+
}

cmd/heartbeat/health/healthtest/healthtest.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package healthtest
33
import (
44
"net/http"
55
"net/http/httptest"
6+
"time"
67
)
78

89
func TestHealthServer(code int) *httptest.Server {
@@ -13,3 +14,12 @@ func TestHealthServer(code int) *httptest.Server {
1314
s := httptest.NewServer(mux)
1415
return s
1516
}
17+
18+
func TestTimeoutServer(timeout time.Duration) *httptest.Server {
19+
mux := http.NewServeMux()
20+
mux.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
21+
time.Sleep(timeout)
22+
}))
23+
s := httptest.NewServer(mux)
24+
return s
25+
}

cmd/heartbeat/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,13 @@ func main() {
7777
rtx.Must(err, "failed to establish a websocket connection with %s", heartbeatURL)
7878

7979
probe := health.NewPortProbe(svcs)
80+
ec := health.NewEndpointClient(static.HealthEndpointTimeout)
8081
hc := &health.Checker{}
8182
if kubernetesURL.URL == nil {
82-
hc = health.NewChecker(probe)
83+
hc = health.NewChecker(probe, ec)
8384
} else {
8485
k8s := health.MustNewKubernetesClient(kubernetesURL.URL, pod, node, namespace, kubernetesAuth)
85-
hc = health.NewCheckerK8S(probe, k8s)
86+
hc = health.NewCheckerK8S(probe, k8s, ec)
8687
}
8788

8889
write(conn, hc, ldr)

static/configs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const (
2020
BackoffMultiplier = 2
2121
BackoffMaxInterval = 5 * time.Minute
2222
BackoffMaxElapsedTime = 0
23+
HealthEndpointTimeout = 5 * time.Second
2324
HeartbeatPeriod = 10 * time.Second
2425
MemorystoreExportPeriod = 10 * time.Second
2526
PrometheusCheckPeriod = time.Minute

0 commit comments

Comments
 (0)