Skip to content

Commit 0ae5b79

Browse files
authored
Merge pull request #622 from JoelSpeed/kube-proxy-health-checks
Update health checks to use Kube-Proxy when no other configuration is provided
2 parents 7a03d4d + 532b956 commit 0ae5b79

File tree

4 files changed

+189
-9
lines changed

4 files changed

+189
-9
lines changed

pkg/providers/v1/aws.go

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,7 @@ func (c *Cloud) buildNLBHealthCheckConfiguration(svc *v1.Service) (healthCheckCo
20832083
UnhealthyThreshold: 2,
20842084
}
20852085
}
2086+
20862087
if parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckProtocol, &hc.Protocol) {
20872088
hc.Protocol = strings.ToUpper(hc.Protocol)
20882089
}
@@ -2097,6 +2098,31 @@ func (c *Cloud) buildNLBHealthCheckConfiguration(svc *v1.Service) (healthCheckCo
20972098

20982099
parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckPort, &hc.Port)
20992100

2101+
switch c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode {
2102+
case config.ClusterServiceLoadBalancerHealthProbeModeShared:
2103+
// For a non-local service, we override the health check to use the kube-proxy port when no other overrides are provided.
2104+
// The kube-proxy port should be open on all nodes and allows the health check to check the nodes ability to proxy traffic.
2105+
// When the node is shutting down, the health check should fail before the node loses the ability to route traffic to the backend pod.
2106+
// This allows the load balancer to gracefully drain connections from the node.
2107+
if svc.Spec.ExternalTrafficPolicy != v1.ServiceExternalTrafficPolicyTypeLocal {
2108+
hc.Path = defaultKubeProxyHealthCheckPath
2109+
if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath != "" {
2110+
hc.Path = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath
2111+
}
2112+
2113+
hc.Port = strconv.Itoa(int(defaultKubeProxyHealthCheckPort))
2114+
if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort != 0 {
2115+
hc.Port = strconv.Itoa(int(c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort))
2116+
}
2117+
2118+
hc.Protocol = elbv2.ProtocolEnumHttp
2119+
}
2120+
case config.ClusterServiceLoadBalancerHealthProbeModeServiceNodePort, "":
2121+
// Configuration is already up to date as this is the default case.
2122+
default:
2123+
return healthCheckConfig{}, fmt.Errorf("Unsupported ClusterServiceLoadBalancerHealthProbeMode %v", c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode)
2124+
}
2125+
21002126
if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCInterval, &hc.Interval); err != nil {
21012127
return healthCheckConfig{}, err
21022128
}
@@ -2495,15 +2521,33 @@ func (c *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, apiS
24952521
}
24962522
} else {
24972523
klog.V(4).Infof("service %v does not need custom health checks", apiService.Name)
2524+
var hcPath string
2525+
hcPort := tcpHealthCheckPort
2526+
24982527
annotationProtocol := strings.ToLower(annotations[ServiceAnnotationLoadBalancerBEProtocol])
24992528
var hcProtocol string
25002529
if annotationProtocol == "https" || annotationProtocol == "ssl" {
25012530
hcProtocol = "SSL"
25022531
} else {
25032532
hcProtocol = "TCP"
25042533
}
2505-
// there must be no path on TCP health check
2506-
err = c.ensureLoadBalancerHealthCheck(loadBalancer, hcProtocol, tcpHealthCheckPort, "", annotations)
2534+
2535+
if c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode == config.ClusterServiceLoadBalancerHealthProbeModeShared {
2536+
// Use the kube-proxy port as the health check port for non-local services.
2537+
hcProtocol = "HTTP"
2538+
hcPath = defaultKubeProxyHealthCheckPath
2539+
hcPort = int32(defaultKubeProxyHealthCheckPort)
2540+
2541+
if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath != "" {
2542+
hcPath = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath
2543+
}
2544+
2545+
if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort != 0 {
2546+
hcPort = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort
2547+
}
2548+
}
2549+
2550+
err = c.ensureLoadBalancerHealthCheck(loadBalancer, hcProtocol, hcPort, hcPath, annotations)
25072551
if err != nil {
25082552
return nil, err
25092553
}

pkg/providers/v1/aws_loadbalancer.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ var (
6868
defaultHealthCheckPort = "traffic-port"
6969
defaultHealthCheckPath = "/"
7070

71+
defaultKubeProxyHealthCheckPort = 10256
72+
defaultKubeProxyHealthCheckPath = "/healthz"
73+
7174
// Defaults for ELB Target operations
7275
defaultRegisterTargetsChunkSize = 100
7376
defaultDeregisterTargetsChunkSize = 100

pkg/providers/v1/aws_test.go

Lines changed: 118 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3223,11 +3223,12 @@ func TestCloud_sortELBSecurityGroupList(t *testing.T) {
32233223

32243224
func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) {
32253225
tests := []struct {
3226-
name string
3227-
annotations map[string]string
3228-
service *v1.Service
3229-
want healthCheckConfig
3230-
wantError bool
3226+
name string
3227+
annotations map[string]string
3228+
service *v1.Service
3229+
modifyConfig func(*config.CloudConfig)
3230+
want healthCheckConfig
3231+
wantError bool
32313232
}{
32323233
{
32333234
name: "default cluster",
@@ -3259,6 +3260,110 @@ func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) {
32593260
},
32603261
wantError: false,
32613262
},
3263+
{
3264+
name: "default cluster with shared health check",
3265+
annotations: map[string]string{},
3266+
service: &v1.Service{
3267+
ObjectMeta: metav1.ObjectMeta{
3268+
Name: "test-svc",
3269+
UID: "UID",
3270+
},
3271+
Spec: v1.ServiceSpec{
3272+
Ports: []v1.ServicePort{
3273+
{
3274+
Name: "http",
3275+
Protocol: v1.ProtocolTCP,
3276+
Port: 8080,
3277+
TargetPort: intstr.FromInt(8880),
3278+
NodePort: 32205,
3279+
},
3280+
},
3281+
},
3282+
},
3283+
modifyConfig: func(cfg *config.CloudConfig) {
3284+
cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared
3285+
},
3286+
want: healthCheckConfig{
3287+
Port: "10256",
3288+
Protocol: elbv2.ProtocolEnumHttp,
3289+
Path: "/healthz",
3290+
Interval: 30,
3291+
Timeout: 10,
3292+
HealthyThreshold: 3,
3293+
UnhealthyThreshold: 3,
3294+
},
3295+
wantError: false,
3296+
},
3297+
{
3298+
name: "default cluster with shared health check and custom port",
3299+
annotations: map[string]string{},
3300+
service: &v1.Service{
3301+
ObjectMeta: metav1.ObjectMeta{
3302+
Name: "test-svc",
3303+
UID: "UID",
3304+
},
3305+
Spec: v1.ServiceSpec{
3306+
Ports: []v1.ServicePort{
3307+
{
3308+
Name: "http",
3309+
Protocol: v1.ProtocolTCP,
3310+
Port: 8080,
3311+
TargetPort: intstr.FromInt(8880),
3312+
NodePort: 32205,
3313+
},
3314+
},
3315+
},
3316+
},
3317+
modifyConfig: func(cfg *config.CloudConfig) {
3318+
cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared
3319+
cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort = 8080
3320+
},
3321+
want: healthCheckConfig{
3322+
Port: "8080",
3323+
Protocol: elbv2.ProtocolEnumHttp,
3324+
Path: "/healthz",
3325+
Interval: 30,
3326+
Timeout: 10,
3327+
HealthyThreshold: 3,
3328+
UnhealthyThreshold: 3,
3329+
},
3330+
wantError: false,
3331+
},
3332+
{
3333+
name: "default cluster with shared health check and custom path",
3334+
annotations: map[string]string{},
3335+
service: &v1.Service{
3336+
ObjectMeta: metav1.ObjectMeta{
3337+
Name: "test-svc",
3338+
UID: "UID",
3339+
},
3340+
Spec: v1.ServiceSpec{
3341+
Ports: []v1.ServicePort{
3342+
{
3343+
Name: "http",
3344+
Protocol: v1.ProtocolTCP,
3345+
Port: 8080,
3346+
TargetPort: intstr.FromInt(8880),
3347+
NodePort: 32205,
3348+
},
3349+
},
3350+
},
3351+
},
3352+
modifyConfig: func(cfg *config.CloudConfig) {
3353+
cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared
3354+
cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath = "/custom-healthz"
3355+
},
3356+
want: healthCheckConfig{
3357+
Port: "10256",
3358+
Protocol: elbv2.ProtocolEnumHttp,
3359+
Path: "/custom-healthz",
3360+
Interval: 30,
3361+
Timeout: 10,
3362+
HealthyThreshold: 3,
3363+
UnhealthyThreshold: 3,
3364+
},
3365+
wantError: false,
3366+
},
32623367
{
32633368
name: "default local",
32643369
annotations: map[string]string{},
@@ -3500,7 +3605,14 @@ func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) {
35003605

35013606
for _, tt := range tests {
35023607
t.Run(tt.name, func(t *testing.T) {
3503-
c := &Cloud{}
3608+
c := &Cloud{
3609+
cfg: &config.CloudConfig{},
3610+
}
3611+
3612+
if tt.modifyConfig != nil {
3613+
tt.modifyConfig(c.cfg)
3614+
}
3615+
35043616
hc, err := c.buildNLBHealthCheckConfiguration(tt.service)
35053617
if !tt.wantError {
35063618
assert.Equal(t, tt.want, hc)

pkg/providers/v1/config/config.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,23 @@ package config
22

33
import (
44
"fmt"
5-
"github.com/aws/aws-sdk-go/aws/request"
65
"strings"
76

7+
"github.com/aws/aws-sdk-go/aws/request"
8+
89
"github.com/aws/aws-sdk-go/aws/endpoints"
910

1011
"k8s.io/klog/v2"
1112
)
1213

14+
const (
15+
// ClusterServiceLoadBalancerHealthProbeModeShared is the shared health probe mode for cluster service load balancer.
16+
ClusterServiceLoadBalancerHealthProbeModeShared = "Shared"
17+
18+
// ClusterServiceLoadBalancerHealthProbeModeServiceNodePort is the service node port health probe mode for cluster service load balancer.
19+
ClusterServiceLoadBalancerHealthProbeModeServiceNodePort = "ServiceNodePort"
20+
)
21+
1322
// CloudConfig wraps the settings for the AWS cloud provider.
1423
// NOTE: Cloud config files should follow the same Kubernetes deprecation policy as
1524
// flags or CLIs. Config fields should not change behavior in incompatible ways and
@@ -62,6 +71,18 @@ type CloudConfig struct {
6271

6372
// NodeIPFamilies determines which IP addresses are added to node objects and their ordering.
6473
NodeIPFamilies []string
74+
75+
// ClusterServiceLoadBalancerHealthProbeMode determines the health probe mode for cluster service load balancer.
76+
// Supported values are `Shared` and `ServiceNodePort`.
77+
// `ServiceeNodePort`: the health probe will be created against each port of each service by watching the backend application (default).
78+
// `Shared`: all cluster services shares one HTTP probe targeting the kube-proxy on the node (<nodeIP>/healthz:10256).
79+
ClusterServiceLoadBalancerHealthProbeMode string `json:"clusterServiceLoadBalancerHealthProbeMode,omitempty" yaml:"clusterServiceLoadBalancerHealthProbeMode,omitempty"`
80+
81+
// ClusterServiceSharedLoadBalancerHealthProbePort defines the target port of the shared health probe. Default to 10256.
82+
ClusterServiceSharedLoadBalancerHealthProbePort int32 `json:"clusterServiceSharedLoadBalancerHealthProbePort,omitempty" yaml:"clusterServiceSharedLoadBalancerHealthProbePort,omitempty"`
83+
84+
// ClusterServiceSharedLoadBalancerHealthProbePath defines the target path of the shared health probe. Default to `/healthz`.
85+
ClusterServiceSharedLoadBalancerHealthProbePath string `json:"clusterServiceSharedLoadBalancerHealthProbePath,omitempty" yaml:"clusterServiceSharedLoadBalancerHealthProbePath,omitempty"`
6586
}
6687
// [ServiceOverride "1"]
6788
// Service = s3

0 commit comments

Comments
 (0)