Skip to content

Commit 73a0b46

Browse files
committed
updated based on comments
1 parent 1ba4da9 commit 73a0b46

9 files changed

Lines changed: 368 additions & 189 deletions

File tree

pkg/awsutils/awsutils.go

Lines changed: 60 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ type APIs interface {
233233
GetVpcSubnets(ctx context.Context) ([]ec2types.Subnet, error)
234234

235235
// IsSubnetExcluded returns if a subnet is excluded for pod IPs based on its tags
236-
IsSubnetExcluded(ctx context.Context, subnetID string) (bool, error)
236+
IsSubnetExcluded(ctx context.Context, subnetID string, isPrimarySubnet bool) (bool, error)
237237
}
238238

239239
// EC2InstanceMetadataCache caches instance metadata
@@ -295,6 +295,9 @@ type ENIMetadata struct {
295295

296296
// Network card the ENI is attached on
297297
NetworkCard int
298+
299+
// SubnetID the ENI is created from
300+
SubnetID string
298301
}
299302

300303
// PrimaryIPv4Address returns the primary IPv4 address of this node
@@ -547,7 +550,12 @@ func (cache *EC2InstanceMetadataCache) discoverCustomSecurityGroups(ctx context.
547550
},
548551
}
549552

550-
result, err := cache.ec2SVC.DescribeSecurityGroups(ctx, describeSGInput)
553+
var result *ec2.DescribeSecurityGroupsOutput
554+
err := retry.NWithBackoffCtx(ctx, retry.NewSimpleBackoff(time.Millisecond*100, time.Second*5, 0.15, 2.0), 5, func() error {
555+
var err error
556+
result, err = cache.ec2SVC.DescribeSecurityGroups(ctx, describeSGInput)
557+
return err
558+
})
551559
if err != nil {
552560
return nil, fmt.Errorf("discoverCustomSecurityGroups: unable to describe security groups: %v", err)
553561
}
@@ -578,14 +586,8 @@ func (cache *EC2InstanceMetadataCache) GetENISubnetID(ctx context.Context, eniID
578586
return *result.NetworkInterfaces[0].SubnetId, nil
579587
}
580588

581-
// Helper function to check if an ENI is in a secondary subnet
582-
func (cache *EC2InstanceMetadataCache) isENIInSecondarySubnet(ctx context.Context, eniID string) bool {
583-
eniSubnetID, err := cache.GetENISubnetID(ctx, eniID)
584-
return err == nil && eniSubnetID != cache.subnetID
585-
}
586-
587589
// Helper function to get ENIs that match specific criteria
588-
func (cache *EC2InstanceMetadataCache) getFilteredENIs(ctx context.Context, store *datastore.DataStore, onlySecondarySubnets bool) []string {
590+
func (cache *EC2InstanceMetadataCache) getFilteredENIs(store *datastore.DataStore, onlySecondarySubnets bool) []string {
589591
eniInfos := store.GetENIInfos()
590592
var eniIDs []string
591593

@@ -596,7 +598,7 @@ func (cache *EC2InstanceMetadataCache) getFilteredENIs(ctx context.Context, stor
596598
continue
597599
}
598600

599-
isSecondarySubnet := cache.isENIInSecondarySubnet(ctx, eniID)
601+
isSecondarySubnet := eniInfo.SubnetID != cache.subnetID
600602

601603
// Filter based on subnet type
602604
if onlySecondarySubnets != isSecondarySubnet {
@@ -671,12 +673,10 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
671673
if err != nil {
672674
awsAPIErrInc("DiscoverCustomSecurityGroups", err)
673675
log.Warnf("Failed to discover custom security groups: %v. Falling back to using primary security groups for ENIs in secondary subnets", err)
674-
675-
// Clear custom security groups cache to trigger fallback behavior
676-
cache.customSecurityGroups.Set([]string{})
677-
678-
// Apply primary security groups to ENIs in secondary subnets as fallback
679-
cache.applyFallbackSecurityGroupsForAllDatastores(ctx, dsAccess)
676+
if eventRecorder := eventrecorder.Get(); eventRecorder != nil {
677+
eventRecorder.SendPodEvent(v1.EventTypeWarning, "FailedCustomSecurityGroupsDiscovery", "DescribeSecurityGroups",
678+
"aws-node failed calling ec2 api to discover custmized security groups for network interfaces from secondary subnets")
679+
}
680680
return err
681681
}
682682

@@ -688,7 +688,7 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
688688
cache.customSecurityGroups.Set([]string{})
689689

690690
// Apply primary security groups to ENIs in secondary subnets as fallback
691-
cache.applyFallbackSecurityGroupsForAllDatastores(ctx, dsAccess)
691+
cache.applyPrimarySGsToSecondarySubnetENIs(ctx, dsAccess)
692692

693693
return nil
694694
}
@@ -700,16 +700,16 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
700700
if addedCount != 0 || deletedCount != 0 {
701701
var eniIDs []string
702702
for _, ds := range dsAccess.DataStores {
703-
eniIDs = append(eniIDs, cache.getFilteredENIs(ctx, ds, true)...) // only secondary subnet ENIs
703+
eniIDs = append(eniIDs, cache.getFilteredENIs(ds, true)...) // only secondary subnet ENIs
704704
}
705705
cache.applySecurityGroupsToENIs(ctx, eniIDs, sgIDs, "Update")
706706
}
707707

708708
return nil
709709
}
710710

711-
// applyFallbackSecurityGroupsForAllDatastores applies primary security groups to ENIs in secondary subnets across all datastores
712-
func (cache *EC2InstanceMetadataCache) applyFallbackSecurityGroupsForAllDatastores(ctx context.Context, dsAccess *datastore.DataStoreAccess) {
711+
// applyPrimarySGsToSecondarySubnetENIs applies primary security groups to ENIs in secondary subnets across all datastores
712+
func (cache *EC2InstanceMetadataCache) applyPrimarySGsToSecondarySubnetENIs(ctx context.Context, dsAccess *datastore.DataStoreAccess) {
713713
log.Info("Applying primary security groups as fallback for ENIs in secondary subnets across all datastores")
714714

715715
primarySGs := cache.securityGroups.SortedList()
@@ -719,7 +719,7 @@ func (cache *EC2InstanceMetadataCache) applyFallbackSecurityGroupsForAllDatastor
719719

720720
var eniIDs []string
721721
for _, ds := range dsAccess.DataStores {
722-
eniIDs = append(eniIDs, cache.getFilteredENIs(ctx, ds, true)...) // only secondary subnet ENIs
722+
eniIDs = append(eniIDs, cache.getFilteredENIs(ds, true)...) // only secondary subnet ENIs
723723
}
724724

725725
cache.applySecurityGroupsToENIs(ctx, eniIDs, primarySGs, "Applying primary security groups to")
@@ -743,7 +743,7 @@ func (cache *EC2InstanceMetadataCache) RefreshSGIDs(ctx context.Context, mac str
743743
if cache.useSubnetDiscovery {
744744
for _, ds := range dsAccess.DataStores {
745745
// Get only primary subnet ENIs (onlySecondarySubnets=false)
746-
primarySubnetENIs := cache.getFilteredENIs(ctx, ds, false)
746+
primarySubnetENIs := cache.getFilteredENIs(ds, false)
747747
for _, eniID := range primarySubnetENIs {
748748
// Filter out unmanaged ENIs
749749
if !cache.unmanagedENIs.Has(eniID) {
@@ -868,6 +868,12 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
868868
}
869869
}
870870

871+
subnetID, err := cache.imds.GetSubnetID(ctx, eniMAC)
872+
if err != nil {
873+
awsAPIErrInc("GetSubnetID", err)
874+
return ENIMetadata{}, err
875+
}
876+
871877
if !ipv4Available && !ipv6Available {
872878
return ENIMetadata{
873879
ENIID: eniID,
@@ -880,6 +886,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
880886
IPv6Addresses: make([]ec2types.NetworkInterfaceIpv6Address, 0),
881887
IPv6Prefixes: make([]ec2types.Ipv6PrefixSpecification, 0),
882888
NetworkCard: networkCard,
889+
SubnetID: subnetID,
883890
}, nil
884891
}
885892

@@ -983,6 +990,7 @@ func (cache *EC2InstanceMetadataCache) getENIMetadata(eniMAC string) (ENIMetadat
983990
IPv6Addresses: ec2ip6s,
984991
IPv6Prefixes: ec2ipv6Prefixes,
985992
NetworkCard: networkCard,
993+
SubnetID: subnetID,
986994
}, nil
987995
}
988996

@@ -1188,9 +1196,9 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
11881196
log.Info("Defaulting to same subnet as the primary interface for the new ENI")
11891197

11901198
// Even in fallback, check if primary subnet is excluded
1191-
excluded, checkErr := cache.IsSubnetExcluded(ctx, cache.subnetID)
1199+
excluded, checkErr := cache.IsSubnetExcluded(ctx, cache.subnetID, true)
11921200
if checkErr != nil {
1193-
log.Warnf("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt.", checkErr)
1201+
return "", fmt.Errorf("Failed to check if primary subnet is excluded: %w. Quit ENI creation attempt.", checkErr)
11941202
} else if excluded {
11951203
// Primary subnet is explicitly excluded
11961204
return "", fmt.Errorf("primary subnet is tagged with kubernetes.io/role/cni=0 - no valid subnets available for ENI creation")
@@ -1205,18 +1213,21 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12051213
for _, subnet := range subnetResult {
12061214
// Check tag for all subnets including primary
12071215
isPrimarySubnet := *subnet.SubnetId == cache.subnetID
1208-
if !validTag(subnet, isPrimarySubnet) {
1216+
if !isSubnetValidForENICreation(subnet, isPrimarySubnet) {
12091217
// Log when primary subnet is excluded
12101218
if isPrimarySubnet {
12111219
log.Infof("Primary subnet %s is excluded from ENI creation", cache.subnetID)
12121220
}
12131221
continue
12141222
}
12151223
validSubnetsFound = true
1224+
// preset security groups for ENI with primary SGs
1225+
input.Groups = cache.securityGroups.SortedList()
12161226
// If this is a secondary subnet and we have custom security groups, use those instead
12171227
// We already determined isPrimarySubnet above, just reuse the variable
12181228
if !isPrimarySubnet && len(cache.customSecurityGroups.SortedList()) > 0 {
12191229
log.Infof("Using custom security groups for ENI in secondary subnet %s", *subnet.SubnetId)
1230+
// overring SGs if using secondary subnets and sgs
12201231
input.Groups = cache.customSecurityGroups.SortedList()
12211232
} else if !isPrimarySubnet {
12221233
// Secondary subnet but no custom security groups available - use primary SGs as fallback
@@ -1239,7 +1250,7 @@ func (cache *EC2InstanceMetadataCache) createENI(ctx context.Context, sg []*stri
12391250
} else {
12401251
log.Info("Using same security group config as the primary interface for the new ENI")
12411252
// When subnet discovery is disabled, check if primary subnet is excluded
1242-
excluded, checkErr := cache.IsSubnetExcluded(ctx, cache.subnetID)
1253+
excluded, checkErr := cache.IsSubnetExcluded(ctx, cache.subnetID, true)
12431254
if checkErr != nil {
12441255
// If we can't determine exclusion status, log warning and proceed
12451256
log.Warnf("Failed to check if primary subnet is excluded: %v. Proceeding with ENI creation attempt.", checkErr)
@@ -1290,18 +1301,13 @@ func (cache *EC2InstanceMetadataCache) GetVpcSubnets(ctx context.Context) ([]ec2
12901301
return subnetResult.Subnets, nil
12911302
}
12921303

1293-
// validTag checks if subnet should be used for ENI/IP allocation
1304+
// isSubnetValidForENICreation checks if subnet should be used for ENI/IP allocation
12941305
// For primary subnet: include by default (no tag), exclude only if tag value is "0"
12951306
// For secondary subnets: exclude by default (no tag), include only if tag exists with non-"0" value
12961307
// If the subnet has cluster-specific tags, it will only be used by the matching cluster
1297-
func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
1298-
// Get cluster name for cluster-specific tag checks
1299-
localClusterName := os.Getenv(clusterNameEnvVar)
1300-
localClusterTagKey := clusterTagKeyPrefix + localClusterName
1301-
1308+
func isSubnetValidForENICreation(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13021309
// Parse subnet tags
13031310
cniTagValue := getTagValue(subnet.Tags, subnetDiscoveryTagKey)
1304-
hasClusterTags, belongsToThisCluster := checkClusterTags(subnet.Tags, localClusterTagKey)
13051311

13061312
// Rule 1: CNI tag with value "0" always excludes the subnet
13071313
if cniTagValue == subnetDiscoveryTagValueExcluded {
@@ -1323,7 +1329,7 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13231329
}
13241330

13251331
// Rule 3: Check cluster-specific tags
1326-
if !hasClusterTags || belongsToThisCluster {
1332+
if ValidSubnetForCluster(subnet, isPrimarySubnet) {
13271333
return true
13281334
}
13291335

@@ -1332,6 +1338,21 @@ func validTag(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
13321338
return false
13331339
}
13341340

1341+
func ValidSubnetForCluster(subnet ec2types.Subnet, isPrimarySubnet bool) bool {
1342+
if isPrimarySubnet {
1343+
// Primary subnets are always valid for any cluster
1344+
return true
1345+
}
1346+
// Get cluster name for cluster-specific tag checks
1347+
localClusterName := os.Getenv(clusterNameEnvVar)
1348+
localClusterTagKey := clusterTagKeyPrefix + localClusterName
1349+
hasClusterTags, belongsToThisCluster := checkClusterTags(subnet.Tags, localClusterTagKey)
1350+
if !hasClusterTags || belongsToThisCluster {
1351+
return true
1352+
}
1353+
return false
1354+
}
1355+
13351356
// getTagValue returns the value of a specific tag key, or empty string if not found
13361357
func getTagValue(tags []ec2types.Tag, key string) string {
13371358
for _, tag := range tags {
@@ -2593,7 +2614,7 @@ func checkAPIErrorAndBroadcastEvent(err error, api string) {
25932614
}
25942615

25952616
// IsSubnetExcluded checks if a subnet is excluded by examining its kubernetes.io/role/cni tag
2596-
func (cache *EC2InstanceMetadataCache) IsSubnetExcluded(ctx context.Context, subnetID string) (bool, error) {
2617+
func (cache *EC2InstanceMetadataCache) IsSubnetExcluded(ctx context.Context, subnetID string, isPrimarySubnet bool) (bool, error) {
25972618
// Get all VPC subnets with their tags
25982619
subnets, err := cache.GetVpcSubnets(ctx)
25992620
if err != nil {
@@ -2603,13 +2624,14 @@ func (cache *EC2InstanceMetadataCache) IsSubnetExcluded(ctx context.Context, sub
26032624
// Find the specific subnet and check its tags
26042625
for _, subnet := range subnets {
26052626
if *subnet.SubnetId == subnetID {
2627+
validForCluster := ValidSubnetForCluster(subnet, isPrimarySubnet)
26062628
// Check if the subnet has the exclusion tag kubernetes.io/role/cni=0
26072629
for _, tag := range subnet.Tags {
26082630
if *tag.Key == "kubernetes.io/role/cni" {
26092631
tagValue := *tag.Value
2610-
excluded := tagValue == "0"
2611-
log.Debugf("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t", subnetID, tagValue, excluded)
2612-
return excluded, nil
2632+
tagExcluded := tagValue == "0"
2633+
log.Debugf("IsSubnetExcluded: subnet %s has tag kubernetes.io/role/cni=%s, excluded=%t, clusterValid=%t", subnetID, tagValue, tagExcluded, validForCluster)
2634+
return tagExcluded && validForCluster, nil
26132635
}
26142636
}
26152637

0 commit comments

Comments
 (0)