@@ -233,7 +233,7 @@ type APIs interface {
233233 GetVpcSubnets (ctx context.Context ) ([]ec2types.Subnet , error )
234234
235235 // IsSubnetExcluded returns if a subnet is excluded for pod IPs based on its tags
236- IsSubnetExcluded (ctx context.Context , subnetID string ) (bool , error )
236+ IsSubnetExcluded (ctx context.Context , subnetID string , isPrimarySubnet bool ) (bool , error )
237237}
238238
239239// EC2InstanceMetadataCache caches instance metadata
@@ -295,6 +295,9 @@ type ENIMetadata struct {
295295
296296 // Network card the ENI is attached on
297297 NetworkCard int
298+
299+ // SubnetID the ENI is created from
300+ SubnetID string
298301}
299302
300303// PrimaryIPv4Address returns the primary IPv4 address of this node
@@ -547,7 +550,12 @@ func (cache *EC2InstanceMetadataCache) discoverCustomSecurityGroups(ctx context.
547550 },
548551 }
549552
550- result , err := cache .ec2SVC .DescribeSecurityGroups (ctx , describeSGInput )
553+ var result * ec2.DescribeSecurityGroupsOutput
554+ err := retry .NWithBackoffCtx (ctx , retry .NewSimpleBackoff (time .Millisecond * 100 , time .Second * 5 , 0.15 , 2.0 ), 5 , func () error {
555+ var err error
556+ result , err = cache .ec2SVC .DescribeSecurityGroups (ctx , describeSGInput )
557+ return err
558+ })
551559 if err != nil {
552560 return nil , fmt .Errorf ("discoverCustomSecurityGroups: unable to describe security groups: %v" , err )
553561 }
@@ -578,14 +586,8 @@ func (cache *EC2InstanceMetadataCache) GetENISubnetID(ctx context.Context, eniID
578586 return * result .NetworkInterfaces [0 ].SubnetId , nil
579587}
580588
581- // Helper function to check if an ENI is in a secondary subnet
582- func (cache * EC2InstanceMetadataCache ) isENIInSecondarySubnet (ctx context.Context , eniID string ) bool {
583- eniSubnetID , err := cache .GetENISubnetID (ctx , eniID )
584- return err == nil && eniSubnetID != cache .subnetID
585- }
586-
587589// Helper function to get ENIs that match specific criteria
588- func (cache * EC2InstanceMetadataCache ) getFilteredENIs (ctx context. Context , store * datastore.DataStore , onlySecondarySubnets bool ) []string {
590+ func (cache * EC2InstanceMetadataCache ) getFilteredENIs (store * datastore.DataStore , onlySecondarySubnets bool ) []string {
589591 eniInfos := store .GetENIInfos ()
590592 var eniIDs []string
591593
@@ -596,7 +598,7 @@ func (cache *EC2InstanceMetadataCache) getFilteredENIs(ctx context.Context, stor
596598 continue
597599 }
598600
599- isSecondarySubnet := cache . isENIInSecondarySubnet ( ctx , eniID )
601+ isSecondarySubnet := eniInfo . SubnetID != cache . subnetID
600602
601603 // Filter based on subnet type
602604 if onlySecondarySubnets != isSecondarySubnet {
@@ -671,12 +673,10 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
671673 if err != nil {
672674 awsAPIErrInc ("DiscoverCustomSecurityGroups" , err )
673675 log .Warnf ("Failed to discover custom security groups: %v. Falling back to using primary security groups for ENIs in secondary subnets" , err )
674-
675- // Clear custom security groups cache to trigger fallback behavior
676- cache .customSecurityGroups .Set ([]string {})
677-
678- // Apply primary security groups to ENIs in secondary subnets as fallback
679- cache .applyFallbackSecurityGroupsForAllDatastores (ctx , dsAccess )
676+ if eventRecorder := eventrecorder .Get (); eventRecorder != nil {
677+ eventRecorder .SendPodEvent (v1 .EventTypeWarning , "FailedCustomSecurityGroupsDiscovery" , "DescribeSecurityGroups" ,
678+ "aws-node failed calling ec2 api to discover custmized security groups for network interfaces from secondary subnets" )
679+ }
680680 return err
681681 }
682682
@@ -688,7 +688,7 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
688688 cache .customSecurityGroups .Set ([]string {})
689689
690690 // Apply primary security groups to ENIs in secondary subnets as fallback
691- cache .applyFallbackSecurityGroupsForAllDatastores (ctx , dsAccess )
691+ cache .applyPrimarySGsToSecondarySubnetENIs (ctx , dsAccess )
692692
693693 return nil
694694 }
@@ -700,16 +700,16 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
700700 if addedCount != 0 || deletedCount != 0 {
701701 var eniIDs []string
702702 for _ , ds := range dsAccess .DataStores {
703- eniIDs = append (eniIDs , cache .getFilteredENIs (ctx , ds , true )... ) // only secondary subnet ENIs
703+ eniIDs = append (eniIDs , cache .getFilteredENIs (ds , true )... ) // only secondary subnet ENIs
704704 }
705705 cache .applySecurityGroupsToENIs (ctx , eniIDs , sgIDs , "Update" )
706706 }
707707
708708 return nil
709709}
710710
711- // applyFallbackSecurityGroupsForAllDatastores applies primary security groups to ENIs in secondary subnets across all datastores
712- func (cache * EC2InstanceMetadataCache ) applyFallbackSecurityGroupsForAllDatastores (ctx context.Context , dsAccess * datastore.DataStoreAccess ) {
711+ // applyPrimarySGsToSecondarySubnetENIs applies primary security groups to ENIs in secondary subnets across all datastores
712+ func (cache * EC2InstanceMetadataCache ) applyPrimarySGsToSecondarySubnetENIs (ctx context.Context , dsAccess * datastore.DataStoreAccess ) {
713713 log .Info ("Applying primary security groups as fallback for ENIs in secondary subnets across all datastores" )
714714
715715 primarySGs := cache .securityGroups .SortedList ()
@@ -719,7 +719,7 @@ func (cache *EC2InstanceMetadataCache) applyFallbackSecurityGroupsForAllDatastor
719719
720720 var eniIDs []string
721721 for _ , ds := range dsAccess .DataStores {
722- eniIDs = append (eniIDs , cache .getFilteredENIs (ctx , ds , true )... ) // only secondary subnet ENIs
722+ eniIDs = append (eniIDs , cache .getFilteredENIs (ds , true )... ) // only secondary subnet ENIs
723723 }
724724
725725 cache .applySecurityGroupsToENIs (ctx , eniIDs , primarySGs , "Applying primary security groups to" )
@@ -743,7 +743,7 @@ func (cache *EC2InstanceMetadataCache) RefreshSGIDs(ctx context.Context, mac str
743743 if cache .useSubnetDiscovery {
744744 for _ , ds := range dsAccess .DataStores {
745745 // Get only primary subnet ENIs (onlySecondarySubnets=false)
746- primarySubnetENIs := cache .getFilteredENIs (ctx , ds , false )
746+ primarySubnetENIs := cache .getFilteredENIs (ds , false )
747747 for _ , eniID := range primarySubnetENIs {
748748 // Filter out unmanaged ENIs
749749 if ! cache .unmanagedENIs .Has (eniID ) {
@@ -868,6 +868,12 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
868868 }
869869 }
870870
871+ subnetID , err := cache .imds .GetSubnetID (ctx , eniMAC )
872+ if err != nil {
873+ awsAPIErrInc ("GetSubnetID" , err )
874+ return ENIMetadata {}, err
875+ }
876+
871877 if ! ipv4Available && ! ipv6Available {
872878 return ENIMetadata {
873879 ENIID : eniID ,
@@ -880,6 +886,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
880886 IPv6Addresses : make ([]ec2types.NetworkInterfaceIpv6Address , 0 ),
881887 IPv6Prefixes : make ([]ec2types.Ipv6PrefixSpecification , 0 ),
882888 NetworkCard : networkCard ,
889+ SubnetID : subnetID ,
883890 }, nil
884891 }
885892
@@ -983,6 +990,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
983990 IPv6Addresses : ec2ip6s ,
984991 IPv6Prefixes : ec2ipv6Prefixes ,
985992 NetworkCard : networkCard ,
993+ SubnetID : subnetID ,
986994 }, nil
987995}
988996
@@ -1188,9 +1196,9 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
11881196 log .Info ("Defaulting to same subnet as the primary interface for the new ENI" )
11891197
11901198 // Even in fallback, check if primary subnet is excluded
1191- excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID )
1199+ excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID , true )
11921200 if checkErr != nil {
1193- log . Warnf ("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt." , checkErr )
1201+ return "" , fmt . Errorf ("Failed to check if primary subnet is excluded: %w. Quit ENI creation attempt." , checkErr )
11941202 } else if excluded {
11951203 // Primary subnet is explicitly excluded
11961204 return "" , fmt .Errorf ("primary subnet is tagged with kubernetes.io/role/cni=0 - no valid subnets available for ENI creation" )
@@ -1205,18 +1213,21 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12051213 for _ , subnet := range subnetResult {
12061214 // Check tag for all subnets including primary
12071215 isPrimarySubnet := * subnet .SubnetId == cache .subnetID
1208- if ! validTag (subnet , isPrimarySubnet ) {
1216+ if ! isSubnetValidForENICreation (subnet , isPrimarySubnet ) {
12091217 // Log when primary subnet is excluded
12101218 if isPrimarySubnet {
12111219 log .Infof ("Primary subnet %s is excluded from ENI creation" , cache .subnetID )
12121220 }
12131221 continue
12141222 }
12151223 validSubnetsFound = true
1224+ // preset security groups for ENI with primary SGs
1225+ input .Groups = cache .securityGroups .SortedList ()
12161226 // If this is a secondary subnet and we have custom security groups, use those instead
12171227 // We already determined isPrimarySubnet above, just reuse the variable
12181228 if ! isPrimarySubnet && len (cache .customSecurityGroups .SortedList ()) > 0 {
12191229 log .Infof ("Using custom security groups for ENI in secondary subnet %s" , * subnet .SubnetId )
1230+ // overring SGs if using secondary subnets and sgs
12201231 input .Groups = cache .customSecurityGroups .SortedList ()
12211232 } else if ! isPrimarySubnet {
12221233 // Secondary subnet but no custom security groups available - use primary SGs as fallback
@@ -1239,7 +1250,7 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12391250 } else {
12401251 log .Info ("Using same security group config as the primary interface for the new ENI" )
12411252 // When subnet discovery is disabled, check if primary subnet is excluded
1242- excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID )
1253+ excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID , true )
12431254 if checkErr != nil {
12441255 // If we can't determine exclusion status, log warning and proceed
12451256 log .Warnf ("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt." , checkErr )
@@ -1290,18 +1301,13 @@ func (cache *EC2InstanceMetadataCache) GetVpcSubnets(ctx context.Context) ([]ec2
12901301 return subnetResult .Subnets , nil
12911302}
12921303
1293- // validTag checks if subnet should be used for ENI/IP allocation
1304+ // isSubnetValidForENICreation checks if subnet should be used for ENI/IP allocation
12941305// For primary subnet: include by default (no tag), exclude only if tag value is "0"
12951306// For secondary subnets: exclude by default (no tag), include only if tag exists with non-"0" value
12961307// If the subnet has cluster-specific tags, it will only be used by the matching cluster
1297- func validTag (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
1298- // Get cluster name for cluster-specific tag checks
1299- localClusterName := os .Getenv (clusterNameEnvVar )
1300- localClusterTagKey := clusterTagKeyPrefix + localClusterName
1301-
1308+ func isSubnetValidForENICreation (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
13021309 // Parse subnet tags
13031310 cniTagValue := getTagValue (subnet .Tags , subnetDiscoveryTagKey )
1304- hasClusterTags , belongsToThisCluster := checkClusterTags (subnet .Tags , localClusterTagKey )
13051311
13061312 // Rule 1: CNI tag with value "0" always excludes the subnet
13071313 if cniTagValue == subnetDiscoveryTagValueExcluded {
@@ -1323,7 +1329,7 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13231329 }
13241330
13251331 // Rule 3: Check cluster-specific tags
1326- if ! hasClusterTags || belongsToThisCluster {
1332+ if ValidSubnetForCluster ( subnet , isPrimarySubnet ) {
13271333 return true
13281334 }
13291335
@@ -1332,6 +1338,21 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13321338 return false
13331339}
13341340
1341+ func ValidSubnetForCluster (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
1342+ if isPrimarySubnet {
1343+ // Primary subnets are always valid for any cluster
1344+ return true
1345+ }
1346+ // Get cluster name for cluster-specific tag checks
1347+ localClusterName := os .Getenv (clusterNameEnvVar )
1348+ localClusterTagKey := clusterTagKeyPrefix + localClusterName
1349+ hasClusterTags , belongsToThisCluster := checkClusterTags (subnet .Tags , localClusterTagKey )
1350+ if ! hasClusterTags || belongsToThisCluster {
1351+ return true
1352+ }
1353+ return false
1354+ }
1355+
13351356// getTagValue returns the value of a specific tag key, or empty string if not found
13361357func getTagValue (tags []ec2types.Tag , key string ) string {
13371358 for _ , tag := range tags {
@@ -2593,7 +2614,7 @@ func checkAPIErrorAndBroadcastEvent(err error, api string) {
25932614}
25942615
25952616// IsSubnetExcluded checks if a subnet is excluded by examining its kubernetes.io/role/cni tag
2596- func (cache * EC2InstanceMetadataCache ) IsSubnetExcluded (ctx context.Context , subnetID string ) (bool , error ) {
2617+ func (cache * EC2InstanceMetadataCache ) IsSubnetExcluded (ctx context.Context , subnetID string , isPrimarySubnet bool ) (bool , error ) {
25972618 // Get all VPC subnets with their tags
25982619 subnets , err := cache .GetVpcSubnets (ctx )
25992620 if err != nil {
@@ -2603,13 +2624,14 @@ func (cache *EC2InstanceMetadataCache) IsSubnetExcluded(ctx context.Context, sub
26032624 // Find the specific subnet and check its tags
26042625 for _ , subnet := range subnets {
26052626 if * subnet .SubnetId == subnetID {
2627+ validForCluster := ValidSubnetForCluster (subnet , isPrimarySubnet )
26062628 // Check if the subnet has the exclusion tag kubernetes.io/role/cni=0
26072629 for _ , tag := range subnet .Tags {
26082630 if * tag .Key == "kubernetes.io/role/cni" {
26092631 tagValue := * tag .Value
2610- excluded := tagValue == "0"
2611- log .Debugf ("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t" , subnetID , tagValue , excluded )
2612- return excluded , nil
2632+ tagExcluded := tagValue == "0"
2633+ log .Debugf ("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t, clusterValid=%t " , subnetID , tagValue , tagExcluded , validForCluster )
2634+ return tagExcluded && validForCluster , nil
26132635 }
26142636 }
26152637
0 commit comments