@@ -237,7 +237,7 @@ type APIs interface {
237237 GetVpcSubnets (ctx context.Context ) ([]ec2types.Subnet , error )
238238
239239 // IsSubnetExcluded returns if a subnet is excluded for pod IPs based on its tags
240- IsSubnetExcluded (ctx context.Context , subnetID string ) (bool , error )
240+ IsSubnetExcluded (ctx context.Context , subnetID string , isPrimarySubnet bool ) (bool , error )
241241}
242242
243243// EC2InstanceMetadataCache caches instance metadata
@@ -299,6 +299,9 @@ type ENIMetadata struct {
299299
300300 // Network card the ENI is attached on
301301 NetworkCard int
302+
303+ // SubnetID the ENI is created from
304+ SubnetID string
302305}
303306
304307// PrimaryIPv4Address returns the primary IPv4 address of this node
@@ -557,7 +560,12 @@ func (cache *EC2InstanceMetadataCache) discoverCustomSecurityGroups(ctx context.
557560 },
558561 }
559562
560- result , err := cache .ec2SVC .DescribeSecurityGroups (ctx , describeSGInput )
563+ var result * ec2.DescribeSecurityGroupsOutput
564+ err := retry .NWithBackoffCtx (ctx , retry .NewSimpleBackoff (time .Millisecond * 100 , time .Second * 5 , 0.15 , 2.0 ), 5 , func () error {
565+ var err error
566+ result , err = cache .ec2SVC .DescribeSecurityGroups (ctx , describeSGInput )
567+ return err
568+ })
561569 if err != nil {
562570 return nil , fmt .Errorf ("discoverCustomSecurityGroups: unable to describe security groups: %v" , err )
563571 }
@@ -588,14 +596,8 @@ func (cache *EC2InstanceMetadataCache) GetENISubnetID(ctx context.Context, eniID
588596 return * result .NetworkInterfaces [0 ].SubnetId , nil
589597}
590598
591- // Helper function to check if an ENI is in a secondary subnet
592- func (cache * EC2InstanceMetadataCache ) isENIInSecondarySubnet (ctx context.Context , eniID string ) bool {
593- eniSubnetID , err := cache .GetENISubnetID (ctx , eniID )
594- return err == nil && eniSubnetID != cache .subnetID
595- }
596-
597599// Helper function to get ENIs that match specific criteria
598- func (cache * EC2InstanceMetadataCache ) getFilteredENIs (ctx context. Context , store * datastore.DataStore , onlySecondarySubnets bool ) []string {
600+ func (cache * EC2InstanceMetadataCache ) getFilteredENIs (store * datastore.DataStore , onlySecondarySubnets bool ) []string {
599601 eniInfos := store .GetENIInfos ()
600602 var eniIDs []string
601603
@@ -606,7 +608,7 @@ func (cache *EC2InstanceMetadataCache) getFilteredENIs(ctx context.Context, stor
606608 continue
607609 }
608610
609- isSecondarySubnet := cache . isENIInSecondarySubnet ( ctx , eniID )
611+ isSecondarySubnet := eniInfo . SubnetID != cache . subnetID
610612
611613 // Filter based on subnet type
612614 if onlySecondarySubnets != isSecondarySubnet {
@@ -681,12 +683,10 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
681683 if err != nil {
682684 awsAPIErrInc ("DiscoverCustomSecurityGroups" , err )
683685 log .Warnf ("Failed to discover custom security groups: %v. Falling back to using primary security groups for ENIs in secondary subnets" , err )
684-
685- // Clear custom security groups cache to trigger fallback behavior
686- cache .customSecurityGroups .Set ([]string {})
687-
688- // Apply primary security groups to ENIs in secondary subnets as fallback
689- cache .applyFallbackSecurityGroupsForAllDatastores (ctx , dsAccess )
686+ if eventRecorder := eventrecorder .Get (); eventRecorder != nil {
687+ eventRecorder .SendPodEvent (v1 .EventTypeWarning , "FailedCustomSecurityGroupsDiscovery" , "DescribeSecurityGroups" ,
688+ "aws-node failed calling ec2 api to discover custmized security groups for network interfaces from secondary subnets" )
689+ }
690690 return err
691691 }
692692
@@ -698,7 +698,7 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
698698 cache .customSecurityGroups .Set ([]string {})
699699
700700 // Apply primary security groups to ENIs in secondary subnets as fallback
701- cache .applyFallbackSecurityGroupsForAllDatastores (ctx , dsAccess )
701+ cache .applyPrimarySGsToSecondarySubnetENIs (ctx , dsAccess )
702702
703703 return nil
704704 }
@@ -710,16 +710,16 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
710710 if addedCount != 0 || deletedCount != 0 {
711711 var eniIDs []string
712712 for _ , ds := range dsAccess .DataStores {
713- eniIDs = append (eniIDs , cache .getFilteredENIs (ctx , ds , true )... ) // only secondary subnet ENIs
713+ eniIDs = append (eniIDs , cache .getFilteredENIs (ds , true )... ) // only secondary subnet ENIs
714714 }
715715 cache .applySecurityGroupsToENIs (ctx , eniIDs , sgIDs , "Update" )
716716 }
717717
718718 return nil
719719}
720720
721- // applyFallbackSecurityGroupsForAllDatastores applies primary security groups to ENIs in secondary subnets across all datastores
722- func (cache * EC2InstanceMetadataCache ) applyFallbackSecurityGroupsForAllDatastores (ctx context.Context , dsAccess * datastore.DataStoreAccess ) {
721+ // applyPrimarySGsToSecondarySubnetENIs applies primary security groups to ENIs in secondary subnets across all datastores
722+ func (cache * EC2InstanceMetadataCache ) applyPrimarySGsToSecondarySubnetENIs (ctx context.Context , dsAccess * datastore.DataStoreAccess ) {
723723 log .Info ("Applying primary security groups as fallback for ENIs in secondary subnets across all datastores" )
724724
725725 primarySGs := cache .securityGroups .SortedList ()
@@ -729,7 +729,7 @@ func (cache *EC2InstanceMetadataCache) applyFallbackSecurityGroupsForAllDatastor
729729
730730 var eniIDs []string
731731 for _ , ds := range dsAccess .DataStores {
732- eniIDs = append (eniIDs , cache .getFilteredENIs (ctx , ds , true )... ) // only secondary subnet ENIs
732+ eniIDs = append (eniIDs , cache .getFilteredENIs (ds , true )... ) // only secondary subnet ENIs
733733 }
734734
735735 cache .applySecurityGroupsToENIs (ctx , eniIDs , primarySGs , "Applying primary security groups to" )
@@ -753,7 +753,7 @@ func (cache *EC2InstanceMetadataCache) RefreshSGIDs(ctx context.Context, mac str
753753 if cache .useSubnetDiscovery {
754754 for _ , ds := range dsAccess .DataStores {
755755 // Get only primary subnet ENIs (onlySecondarySubnets=false)
756- primarySubnetENIs := cache .getFilteredENIs (ctx , ds , false )
756+ primarySubnetENIs := cache .getFilteredENIs (ds , false )
757757 for _ , eniID := range primarySubnetENIs {
758758 // Filter out unmanaged ENIs
759759 if ! cache .unmanagedENIs .Has (eniID ) {
@@ -878,6 +878,12 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
878878 }
879879 }
880880
881+ subnetID , err := cache .imds .GetSubnetID (ctx , eniMAC )
882+ if err != nil {
883+ awsAPIErrInc ("GetSubnetID" , err )
884+ return ENIMetadata {}, err
885+ }
886+
881887 if ! ipv4Available && ! ipv6Available {
882888 return ENIMetadata {
883889 ENIID : eniID ,
@@ -890,6 +896,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
890896 IPv6Addresses : make ([]ec2types.NetworkInterfaceIpv6Address , 0 ),
891897 IPv6Prefixes : make ([]ec2types.Ipv6PrefixSpecification , 0 ),
892898 NetworkCard : networkCard ,
899+ SubnetID : subnetID ,
893900 }, nil
894901 }
895902
@@ -993,6 +1000,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
9931000 IPv6Addresses : ec2ip6s ,
9941001 IPv6Prefixes : ec2ipv6Prefixes ,
9951002 NetworkCard : networkCard ,
1003+ SubnetID : subnetID ,
9961004 }, nil
9971005}
9981006
@@ -1198,9 +1206,9 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
11981206 log .Info ("Defaulting to same subnet as the primary interface for the new ENI" )
11991207
12001208 // Even in fallback, check if primary subnet is excluded
1201- excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID )
1209+ excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID , true )
12021210 if checkErr != nil {
1203- log . Warnf ("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt." , checkErr )
1211+ return "" , fmt . Errorf ("Failed to check if primary subnet is excluded: %w. Quit ENI creation attempt." , checkErr )
12041212 } else if excluded {
12051213 // Primary subnet is explicitly excluded
12061214 return "" , fmt .Errorf ("primary subnet is tagged with kubernetes.io/role/cni=0 - no valid subnets available for ENI creation" )
@@ -1215,18 +1223,21 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12151223 for _ , subnet := range subnetResult {
12161224 // Check tag for all subnets including primary
12171225 isPrimarySubnet := * subnet .SubnetId == cache .subnetID
1218- if ! validTag (subnet , isPrimarySubnet ) {
1226+ if ! isSubnetValidForENICreation (subnet , isPrimarySubnet ) {
12191227 // Log when primary subnet is excluded
12201228 if isPrimarySubnet {
12211229 log .Infof ("Primary subnet %s is excluded from ENI creation" , cache .subnetID )
12221230 }
12231231 continue
12241232 }
12251233 validSubnetsFound = true
1234+ // preset security groups for ENI with primary SGs
1235+ input .Groups = cache .securityGroups .SortedList ()
12261236 // If this is a secondary subnet and we have custom security groups, use those instead
12271237 // We already determined isPrimarySubnet above, just reuse the variable
12281238 if ! isPrimarySubnet && len (cache .customSecurityGroups .SortedList ()) > 0 {
12291239 log .Infof ("Using custom security groups for ENI in secondary subnet %s" , * subnet .SubnetId )
1240+ // overring SGs if using secondary subnets and sgs
12301241 input .Groups = cache .customSecurityGroups .SortedList ()
12311242 } else if ! isPrimarySubnet {
12321243 // Secondary subnet but no custom security groups available - use primary SGs as fallback
@@ -1249,7 +1260,7 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12491260 } else {
12501261 log .Info ("Using same security group config as the primary interface for the new ENI" )
12511262 // When subnet discovery is disabled, check if primary subnet is excluded
1252- excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID )
1263+ excluded , checkErr := cache .IsSubnetExcluded (ctx , cache .subnetID , true )
12531264 if checkErr != nil {
12541265 // If we can't determine exclusion status, log warning and proceed
12551266 log .Warnf ("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt." , checkErr )
@@ -1300,18 +1311,13 @@ func (cache *EC2InstanceMetadataCache) GetVpcSubnets(ctx context.Context) ([]ec2
13001311 return subnetResult .Subnets , nil
13011312}
13021313
1303- // validTag checks if subnet should be used for ENI/IP allocation
1314+ // isSubnetValidForENICreation checks if subnet should be used for ENI/IP allocation
13041315// For primary subnet: include by default (no tag), exclude only if tag value is "0"
13051316// For secondary subnets: exclude by default (no tag), include only if tag exists with non-"0" value
13061317// If the subnet has cluster-specific tags, it will only be used by the matching cluster
1307- func validTag (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
1308- // Get cluster name for cluster-specific tag checks
1309- localClusterName := os .Getenv (clusterNameEnvVar )
1310- localClusterTagKey := clusterTagKeyPrefix + localClusterName
1311-
1318+ func isSubnetValidForENICreation (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
13121319 // Parse subnet tags
13131320 cniTagValue := getTagValue (subnet .Tags , subnetDiscoveryTagKey )
1314- hasClusterTags , belongsToThisCluster := checkClusterTags (subnet .Tags , localClusterTagKey )
13151321
13161322 // Rule 1: CNI tag with value "0" always excludes the subnet
13171323 if cniTagValue == subnetDiscoveryTagValueExcluded {
@@ -1333,7 +1339,7 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13331339 }
13341340
13351341 // Rule 3: Check cluster-specific tags
1336- if ! hasClusterTags || belongsToThisCluster {
1342+ if ValidSubnetForCluster ( subnet , isPrimarySubnet ) {
13371343 return true
13381344 }
13391345
@@ -1342,6 +1348,24 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13421348 return false
13431349}
13441350
1351+ // ValidSubnetForCluster checks if a subnet is valid for use by this cluster
1352+ // For primary subnets, they are always valid for any cluster
1353+ // For secondary subnets, they must either have no cluster tags or have a matching cluster tag
1354+ func ValidSubnetForCluster (subnet ec2types.Subnet , isPrimarySubnet bool ) bool {
1355+ if isPrimarySubnet {
1356+ // Primary subnets are always valid for any cluster
1357+ return true
1358+ }
1359+ // Get cluster name for cluster-specific tag checks
1360+ localClusterName := os .Getenv (clusterNameEnvVar )
1361+ localClusterTagKey := clusterTagKeyPrefix + localClusterName
1362+ hasClusterTags , belongsToThisCluster := checkClusterTags (subnet .Tags , localClusterTagKey )
1363+ if ! hasClusterTags || belongsToThisCluster {
1364+ return true
1365+ }
1366+ return false
1367+ }
1368+
13451369// getTagValue returns the value of a specific tag key, or empty string if not found
13461370func getTagValue (tags []ec2types.Tag , key string ) string {
13471371 for _ , tag := range tags {
@@ -2603,7 +2627,7 @@ func checkAPIErrorAndBroadcastEvent(err error, api string) {
26032627}
26042628
26052629// IsSubnetExcluded checks if a subnet is excluded by examining its kubernetes.io/role/cni tag
2606- func (cache * EC2InstanceMetadataCache ) IsSubnetExcluded (ctx context.Context , subnetID string ) (bool , error ) {
2630+ func (cache * EC2InstanceMetadataCache ) IsSubnetExcluded (ctx context.Context , subnetID string , isPrimarySubnet bool ) (bool , error ) {
26072631 // Get all VPC subnets with their tags
26082632 subnets , err := cache .GetVpcSubnets (ctx )
26092633 if err != nil {
@@ -2613,13 +2637,17 @@ func (cache *EC2InstanceMetadataCache) IsSubnetExcluded(ctx context.Context, sub
26132637 // Find the specific subnet and check its tags
26142638 for _ , subnet := range subnets {
26152639 if * subnet .SubnetId == subnetID {
2640+ if ! ValidSubnetForCluster (subnet , isPrimarySubnet ) {
2641+ log .Debugf ("IsSubnetExcluded: subnet %s doesn't have valid cluster tag" , subnetID )
2642+ return true , nil
2643+ }
26162644 // Check if the subnet has the exclusion tag kubernetes.io/role/cni=0
26172645 for _ , tag := range subnet .Tags {
26182646 if * tag .Key == "kubernetes.io/role/cni" {
26192647 tagValue := * tag .Value
2620- excluded := tagValue == "0"
2621- log .Debugf ("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t" , subnetID , tagValue , excluded )
2622- return excluded , nil
2648+ tagExcluded := tagValue == "0"
2649+ log .Debugf ("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t" , subnetID , tagValue , tagExcluded )
2650+ return tagExcluded , nil
26232651 }
26242652 }
26252653
0 commit comments