Skip to content

Commit 6541c3a

Browse files
committed
add cache and modify logic
1 parent b7a32e1 commit 6541c3a

File tree

4 files changed

+359
-265
lines changed

4 files changed

+359
-265
lines changed

pkg/targetgroupbinding/resource_manager.go

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@ import (
44
"context"
55
"fmt"
66
"net/netip"
7+
"strings"
78
"sync"
89
"time"
910

1011
smithy "github.com/aws/smithy-go"
1112
"k8s.io/apimachinery/pkg/util/cache"
1213

13-
"strings"
14-
1514
awssdk "github.com/aws/aws-sdk-go-v2/aws"
1615
elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
1716
"github.com/go-logr/logr"
@@ -23,7 +22,6 @@ import (
2322
"k8s.io/apimachinery/pkg/util/sets"
2423
"k8s.io/client-go/tools/record"
2524
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
26-
"sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
2725
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
2826
"sigs.k8s.io/aws-load-balancer-controller/pkg/backend"
2927
ctrlerrors "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
@@ -73,6 +71,9 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB
7371
invalidVpcCache: cache.NewExpiring(),
7472
invalidVpcCacheTTL: defaultTargetsCacheTTL,
7573

74+
needsPodAZCache: cache.NewExpiring(),
75+
needsPodAZCacheTTL: defaultNeedsPodAZCacheTTL,
76+
7677
requeueDuration: defaultRequeueDuration,
7778
}
7879
}
@@ -98,6 +99,13 @@ type defaultResourceManager struct {
9899
invalidVpcCacheTTL time.Duration
99100
invalidVpcCacheMutex sync.RWMutex
100101

102+
// needsPodAZCache tracks TGBs that require pod availability zones during target registration.
103+
// When AWS returns an AZ validation error, we cache the TGB key to avoid retrying with AZ="all"
104+
// on subsequent reconciles, improving performance by going directly to pod-specific AZs.
105+
needsPodAZCache *cache.Expiring
106+
needsPodAZCacheTTL time.Duration
107+
needsPodAZCacheMutex sync.RWMutex
108+
101109
requeueDuration time.Duration
102110
}
103111

@@ -578,21 +586,38 @@ func (m *defaultResourceManager) registerPodEndpoints(ctx context.Context, tgb *
578586
return err
579587
}
580588

581-
svcAnnotations, err := m.getServiceAnnotations(ctx, tgb)
589+
tgbKey := fmt.Sprintf("%s/%s", tgb.Namespace, tgb.Name)
590+
m.needsPodAZCacheMutex.RLock()
591+
_, needsPodAZ := m.needsPodAZCache.Get(tgbKey)
592+
m.needsPodAZCacheMutex.RUnlock()
593+
594+
sdkTargets, err := m.prepareRegistrationCall(ctx, endpoints, tgb, overrideAzFn, needsPodAZ)
582595
if err != nil {
583596
return err
584597
}
598+
err = m.targetsManager.RegisterTargets(ctx, tgb, sdkTargets)
599+
if err != nil && isAZValidationError(err) && !needsPodAZ {
600+
m.logger.Info("RegisterTargets failed with AZ validation error, retrying with pod AZs", "tgb", tgbKey)
601+
m.needsPodAZCacheMutex.Lock()
602+
m.needsPodAZCache.Set(tgbKey, true, m.needsPodAZCacheTTL)
603+
m.needsPodAZCacheMutex.Unlock()
585604

586-
sdkTargets, err := m.prepareRegistrationCall(ctx, endpoints, tgb, overrideAzFn, svcAnnotations)
587-
if err != nil {
588-
return err
605+
sdkTargets, err = m.prepareRegistrationCall(ctx, endpoints, tgb, overrideAzFn, true)
606+
if err != nil {
607+
return err
608+
}
609+
retryErr := m.targetsManager.RegisterTargets(ctx, tgb, sdkTargets)
610+
if retryErr != nil {
611+
m.logger.Error(retryErr, "Failed to register targets even with availability zone specified.", "tgb", tgbKey)
612+
return retryErr
613+
}
614+
return nil
589615
}
590-
return m.targetsManager.RegisterTargets(ctx, tgb, sdkTargets)
616+
return err
591617
}
592618

593-
func (m *defaultResourceManager) prepareRegistrationCall(ctx context.Context, endpoints []backend.PodEndpoint, tgb *elbv2api.TargetGroupBinding, doAzOverride func(addr netip.Addr) bool, svcAnnotations map[string]string) ([]elbv2types.TargetDescription, error) {
594-
isALB := m.isALBIngress(svcAnnotations)
595-
crossZoneDisabled := m.isCrossZoneDisabled(svcAnnotations, isALB)
619+
func (m *defaultResourceManager) prepareRegistrationCall(ctx context.Context, endpoints []backend.PodEndpoint, tgb *elbv2api.TargetGroupBinding, doAzOverride func(addr netip.Addr) bool, usePodAZ bool) ([]elbv2types.TargetDescription, error) {
620+
usingCrossAccount := tgb.Spec.IamRoleArnToAssume != ""
596621

597622
sdkTargets := make([]elbv2types.TargetDescription, 0, len(endpoints))
598623
for _, endpoint := range endpoints {
@@ -604,8 +629,9 @@ func (m *defaultResourceManager) prepareRegistrationCall(ctx context.Context, en
604629
if err != nil {
605630
return sdkTargets, err
606631
}
607-
if doAzOverride(podIP) {
608-
if isALB && crossZoneDisabled && tgb.Spec.IamRoleArnToAssume == "" {
632+
needsAzOverride := doAzOverride(podIP)
633+
if needsAzOverride {
634+
if usePodAZ && !usingCrossAccount {
609635
az, err := m.getPodAvailabilityZone(ctx, endpoint.Pod)
610636
if err != nil {
611637
return sdkTargets, err
@@ -865,34 +891,12 @@ func (m *defaultResourceManager) getMaxNewTargets(newTargetCount int, currentTar
865891
return newTargetCount
866892
}
867893

868-
func (m *defaultResourceManager) getServiceAnnotations(ctx context.Context, tgb *elbv2api.TargetGroupBinding) (map[string]string, error) {
869-
svcKey := buildServiceReferenceKey(tgb, tgb.Spec.ServiceRef)
870-
svc := &corev1.Service{}
871-
if err := m.k8sClient.Get(ctx, svcKey, svc); err != nil {
872-
return nil, err
873-
}
874-
return svc.Annotations, nil
875-
}
876-
877-
func (m *defaultResourceManager) isALBIngress(svcAnnotations map[string]string) bool {
878-
for key := range svcAnnotations {
879-
if strings.HasPrefix(key, annotations.AnnotationPrefixIngress) {
880-
return true
881-
}
882-
}
883-
return false
884-
}
885-
886-
func (m *defaultResourceManager) isCrossZoneDisabled(svcAnnotations map[string]string, isALB bool) bool {
887-
crossZoneDisabled := "load_balancing.cross_zone.enabled=false"
888-
if isALB {
889-
if attrs, ok := svcAnnotations[annotations.AnnotationPrefixIngress+"/"+annotations.IngressSuffixTargetGroupAttributes]; ok {
890-
return strings.Contains(attrs, crossZoneDisabled)
891-
}
892-
} else {
893-
if attrs, ok := svcAnnotations[annotations.SvcBetaAnnotationPrefix+"/"+annotations.SvcLBSuffixLoadBalancerAttributes]; ok {
894-
return strings.Contains(attrs, crossZoneDisabled)
895-
}
894+
func isAZValidationError(err error) bool {
895+
var apiErr smithy.APIError
896+
if errors.As(err, &apiErr) {
897+
isMatch := apiErr.ErrorCode() == "ValidationError" &&
898+
strings.Contains(apiErr.ErrorMessage(), "you must specify an Availability Zone")
899+
return isMatch
896900
}
897901
return false
898902
}
@@ -908,10 +912,6 @@ func (m *defaultResourceManager) getPodAvailabilityZone(ctx context.Context, pod
908912
}
909913

910914
az, ok := node.Labels[corev1.LabelTopologyZone]
911-
// fallback: try legacy/deprecated label failure-domain.beta.kubernetes.io/zone
912-
if !ok {
913-
az, ok = node.Labels["failure-domain.beta.kubernetes.io/zone"]
914-
}
915915
if !ok {
916916
return "", errors.Errorf("node %s has no availability zone label", pod.NodeName)
917917
}

0 commit comments

Comments
 (0)