@@ -4,14 +4,13 @@ import (
44 "context"
55 "fmt"
66 "net/netip"
7+ "strings"
78 "sync"
89 "time"
910
1011 smithy "github.com/aws/smithy-go"
1112 "k8s.io/apimachinery/pkg/util/cache"
1213
13- "strings"
14-
1514 awssdk "github.com/aws/aws-sdk-go-v2/aws"
1615 elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
1716 "github.com/go-logr/logr"
@@ -23,7 +22,6 @@ import (
2322 "k8s.io/apimachinery/pkg/util/sets"
2423 "k8s.io/client-go/tools/record"
2524 elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
26- "sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
2725 "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
2826 "sigs.k8s.io/aws-load-balancer-controller/pkg/backend"
2927 ctrlerrors "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
@@ -73,6 +71,9 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB
7371 invalidVpcCache : cache .NewExpiring (),
7472 invalidVpcCacheTTL : defaultTargetsCacheTTL ,
7573
74+ needsPodAZCache : cache .NewExpiring (),
75+ needsPodAZCacheTTL : defaultNeedsPodAZCacheTTL ,
76+
7677 requeueDuration : defaultRequeueDuration ,
7778 }
7879}
@@ -98,6 +99,13 @@ type defaultResourceManager struct {
9899 invalidVpcCacheTTL time.Duration
99100 invalidVpcCacheMutex sync.RWMutex
100101
102+ // needsPodAZCache tracks TGBs that require pod availability zones during target registration.
103+ // When AWS returns an AZ validation error, we cache the TGB key to avoid retrying with AZ="all"
104+ // on subsequent reconciles, improving performance by going directly to pod-specific AZs.
105+ needsPodAZCache * cache.Expiring
106+ needsPodAZCacheTTL time.Duration
107+ needsPodAZCacheMutex sync.RWMutex
108+
101109 requeueDuration time.Duration
102110}
103111
@@ -578,21 +586,38 @@ func (m *defaultResourceManager) registerPodEndpoints(ctx context.Context, tgb *
578586 return err
579587 }
580588
581- svcAnnotations , err := m .getServiceAnnotations (ctx , tgb )
589+ tgbKey := fmt .Sprintf ("%s/%s" , tgb .Namespace , tgb .Name )
590+ m .needsPodAZCacheMutex .RLock ()
591+ _ , needsPodAZ := m .needsPodAZCache .Get (tgbKey )
592+ m .needsPodAZCacheMutex .RUnlock ()
593+
594+ sdkTargets , err := m .prepareRegistrationCall (ctx , endpoints , tgb , overrideAzFn , needsPodAZ )
582595 if err != nil {
583596 return err
584597 }
598+ err = m .targetsManager .RegisterTargets (ctx , tgb , sdkTargets )
599+ if err != nil && isAZValidationError (err ) && ! needsPodAZ {
600+ m .logger .Info ("RegisterTargets failed with AZ validation error, retrying with pod AZs" , "tgb" , tgbKey )
601+ m .needsPodAZCacheMutex .Lock ()
602+ m .needsPodAZCache .Set (tgbKey , true , m .needsPodAZCacheTTL )
603+ m .needsPodAZCacheMutex .Unlock ()
585604
586- sdkTargets , err := m .prepareRegistrationCall (ctx , endpoints , tgb , overrideAzFn , svcAnnotations )
587- if err != nil {
588- return err
605+ sdkTargets , err = m .prepareRegistrationCall (ctx , endpoints , tgb , overrideAzFn , true )
606+ if err != nil {
607+ return err
608+ }
609+ retryErr := m .targetsManager .RegisterTargets (ctx , tgb , sdkTargets )
610+ if retryErr != nil {
611+ m .logger .Error (retryErr , "Failed to register targets even with availability zone specified." , "tgb" , tgbKey )
612+ return retryErr
613+ }
614+ return nil
589615 }
590- return m . targetsManager . RegisterTargets ( ctx , tgb , sdkTargets )
616+ return err
591617}
592618
593- func (m * defaultResourceManager ) prepareRegistrationCall (ctx context.Context , endpoints []backend.PodEndpoint , tgb * elbv2api.TargetGroupBinding , doAzOverride func (addr netip.Addr ) bool , svcAnnotations map [string ]string ) ([]elbv2types.TargetDescription , error ) {
594- isALB := m .isALBIngress (svcAnnotations )
595- crossZoneDisabled := m .isCrossZoneDisabled (svcAnnotations , isALB )
619+ func (m * defaultResourceManager ) prepareRegistrationCall (ctx context.Context , endpoints []backend.PodEndpoint , tgb * elbv2api.TargetGroupBinding , doAzOverride func (addr netip.Addr ) bool , usePodAZ bool ) ([]elbv2types.TargetDescription , error ) {
620+ usingCrossAccount := tgb .Spec .IamRoleArnToAssume != ""
596621
597622 sdkTargets := make ([]elbv2types.TargetDescription , 0 , len (endpoints ))
598623 for _ , endpoint := range endpoints {
@@ -604,8 +629,9 @@ func (m *defaultResourceManager) prepareRegistrationCall(ctx context.Context, en
604629 if err != nil {
605630 return sdkTargets , err
606631 }
607- if doAzOverride (podIP ) {
608- if isALB && crossZoneDisabled && tgb .Spec .IamRoleArnToAssume == "" {
632+ needsAzOverride := doAzOverride (podIP )
633+ if needsAzOverride {
634+ if usePodAZ && ! usingCrossAccount {
609635 az , err := m .getPodAvailabilityZone (ctx , endpoint .Pod )
610636 if err != nil {
611637 return sdkTargets , err
@@ -865,34 +891,12 @@ func (m *defaultResourceManager) getMaxNewTargets(newTargetCount int, currentTar
865891 return newTargetCount
866892}
867893
868- func (m * defaultResourceManager ) getServiceAnnotations (ctx context.Context , tgb * elbv2api.TargetGroupBinding ) (map [string ]string , error ) {
869- svcKey := buildServiceReferenceKey (tgb , tgb .Spec .ServiceRef )
870- svc := & corev1.Service {}
871- if err := m .k8sClient .Get (ctx , svcKey , svc ); err != nil {
872- return nil , err
873- }
874- return svc .Annotations , nil
875- }
876-
877- func (m * defaultResourceManager ) isALBIngress (svcAnnotations map [string ]string ) bool {
878- for key := range svcAnnotations {
879- if strings .HasPrefix (key , annotations .AnnotationPrefixIngress ) {
880- return true
881- }
882- }
883- return false
884- }
885-
886- func (m * defaultResourceManager ) isCrossZoneDisabled (svcAnnotations map [string ]string , isALB bool ) bool {
887- crossZoneDisabled := "load_balancing.cross_zone.enabled=false"
888- if isALB {
889- if attrs , ok := svcAnnotations [annotations .AnnotationPrefixIngress + "/" + annotations .IngressSuffixTargetGroupAttributes ]; ok {
890- return strings .Contains (attrs , crossZoneDisabled )
891- }
892- } else {
893- if attrs , ok := svcAnnotations [annotations .SvcBetaAnnotationPrefix + "/" + annotations .SvcLBSuffixLoadBalancerAttributes ]; ok {
894- return strings .Contains (attrs , crossZoneDisabled )
895- }
894+ func isAZValidationError (err error ) bool {
895+ var apiErr smithy.APIError
896+ if errors .As (err , & apiErr ) {
897+ isMatch := apiErr .ErrorCode () == "ValidationError" &&
898+ strings .Contains (apiErr .ErrorMessage (), "you must specify an Availability Zone" )
899+ return isMatch
896900 }
897901 return false
898902}
@@ -908,10 +912,6 @@ func (m *defaultResourceManager) getPodAvailabilityZone(ctx context.Context, pod
908912 }
909913
910914 az , ok := node .Labels [corev1 .LabelTopologyZone ]
911- // fallback: try legacy/deprecated label failure-domain.beta.kubernetes.io/zone
912- if ! ok {
913- az , ok = node .Labels ["failure-domain.beta.kubernetes.io/zone" ]
914- }
915915 if ! ok {
916916 return "" , errors .Errorf ("node %s has no availability zone label" , pod .NodeName )
917917 }
0 commit comments