Skip to content

Commit 1ba4da9

Browse files
committed
add changes from review
2 parents cdecbbb + 08ef8ed commit 1ba4da9

5 files changed

Lines changed: 268 additions & 51 deletions

File tree

cmd/aws-k8s-agent/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ func _main() int {
144144
}
145145
// Measure node initialization duration
146146
IPAMDNodeInitStartTime := time.Now()
147-
ipamContext, err := ipamd.New(k8sClient, withApiServer)
147+
ctx := context.Background()
148+
ipamContext, err := ipamd.New(ctx, k8sClient, withApiServer)
148149
IPAMDNodeInitDuration := time.Since(IPAMDNodeInitStartTime).Seconds()
149150

150151
if err != nil {
@@ -164,7 +165,7 @@ func _main() int {
164165
}
165166

166167
// Pool manager
167-
go ipamContext.StartNodeIPPoolManager(context.Background())
168+
go ipamContext.StartNodeIPPoolManager(ctx)
168169

169170
if !utils.GetBoolAsStringEnvVar(envDisableMetrics, false) {
170171
// Prometheus metrics

pkg/awsutils/awsutils.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -408,10 +408,7 @@ func (i instrumentedIMDS) GetMetadataWithContext(ctx context.Context, p string)
408408
}
409409

410410
// New creates an EC2InstanceMetadataCache
411-
func New(useSubnetDiscovery, useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) {
412-
// ctx is passed to initWithEC2Metadata func to cancel spawned go-routines when tests are run
413-
ctx := context.Background()
414-
411+
func New(ctx context.Context, useSubnetDiscovery, useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) {
415412
awsconfig, err := awssession.New(ctx)
416413
if err != nil {
417414
return nil, errors.Wrap(err, "failed to create aws session")
@@ -449,7 +446,7 @@ func New(useSubnetDiscovery, useCustomNetworking, disableLeakedENICleanup, v4Ena
449446

450447
// Clean up leaked ENIs in the background
451448
if !disableLeakedENICleanup {
452-
go wait.Forever(func() { cache.cleanUpLeakedENIs(context.Background()) }, time.Hour)
449+
go wait.Forever(func() { cache.cleanUpLeakedENIs(ctx) }, time.Hour)
453450
}
454451
return cache, nil
455452
}
@@ -602,10 +599,7 @@ func (cache *EC2InstanceMetadataCache) getFilteredENIs(ctx context.Context, stor
602599
isSecondarySubnet := cache.isENIInSecondarySubnet(ctx, eniID)
603600

604601
// Filter based on subnet type
605-
if onlySecondarySubnets && !isSecondarySubnet {
606-
continue
607-
} else if !onlySecondarySubnets && isSecondarySubnet {
608-
// When we want primary subnet ENIs, skip secondary subnet ENIs
602+
if onlySecondarySubnets != isSecondarySubnet {
609603
continue
610604
}
611605

@@ -676,8 +670,7 @@ func (cache *EC2InstanceMetadataCache) RefreshCustomSGIDs(ctx context.Context, d
676670
sgIDs, err := cache.discoverCustomSecurityGroups(ctx)
677671
if err != nil {
678672
awsAPIErrInc("DiscoverCustomSecurityGroups", err)
679-
log.Warnf("Failed to discover custom security groups: %v", err)
680-
log.Info("Falling back to using primary security groups for ENIs in secondary subnets")
673+
log.Warnf("Failed to discover custom security groups: %v. Falling back to using primary security groups for ENIs in secondary subnets", err)
681674

682675
// Clear custom security groups cache to trigger fallback behavior
683676
cache.customSecurityGroups.Set([]string{})

pkg/ipamd/datastore/data_store.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,9 +1773,6 @@ func (ds *DataStore) deallocateEmptyCIDR(eniID string, cidrToCleanup *CidrInfo)
17731773
cidrStr := cidrToCleanup.Cidr.String()
17741774
ds.log.Infof("Starting async cleanup for empty CIDR %s on excluded ENI %s", cidrStr, eniID)
17751775

1776-
// Add delay to avoid race conditions with pod cleanup
1777-
time.Sleep(5 * time.Second)
1778-
17791776
ds.lock.Lock()
17801777
defer ds.lock.Unlock()
17811778

pkg/ipamd/ipamd.go

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,8 @@ func (c *IPAMContext) inInsufficientCidrCoolingPeriod() bool {
394394

395395
// New retrieves IP address usage information from Instance MetaData service and Kubelet
396396
// then initializes IP address pool data store
397-
func New(k8sClient client.Client, withApiServer bool) (*IPAMContext, error) {
397+
func New(ctx context.Context, k8sClient client.Client, withApiServer bool) (*IPAMContext, error) {
398398
prometheusRegister()
399-
ctx := context.Background()
400399
c := &IPAMContext{}
401400
c.k8sClient = k8sClient
402401
c.networkClient = networkutils.New()
@@ -407,7 +406,7 @@ func New(k8sClient client.Client, withApiServer bool) (*IPAMContext, error) {
407406
c.enableIPv4 = isIPv4Enabled()
408407
c.enableIPv6 = isIPv6Enabled()
409408
c.disableENIProvisioning = disableENIProvisioning()
410-
client, err := awsutils.New(c.useSubnetDiscovery, c.useCustomNetworking, disableLeakedENICleanup(), c.enableIPv4, c.enableIPv6)
409+
client, err := awsutils.New(ctx, c.useSubnetDiscovery, c.useCustomNetworking, disableLeakedENICleanup(), c.enableIPv4, c.enableIPv6)
411410
if err != nil {
412411
return nil, errors.Wrap(err, "ipamd: can not initialize with AWS SDK interface")
413412
}
@@ -446,16 +445,15 @@ func New(k8sClient client.Client, withApiServer bool) (*IPAMContext, error) {
446445
c.myNodeName = os.Getenv(envNodeName)
447446
c.withApiServer = withApiServer
448447

449-
if err := c.nodeInit(); err != nil {
448+
if err := c.nodeInit(ctx); err != nil {
450449
return nil, err
451450
}
452451
return c, nil
453452
}
454453

455-
func (c *IPAMContext) nodeInit() error {
454+
func (c *IPAMContext) nodeInit(ctx context.Context) error {
456455
prometheusmetrics.IpamdActionsInprogress.WithLabelValues("nodeInit").Add(float64(1))
457456
defer prometheusmetrics.IpamdActionsInprogress.WithLabelValues("nodeInit").Sub(float64(1))
458-
ctx := context.TODO()
459457
log.Debugf("Start node init")
460458

461459
if err := c.initENIAndIPLimits(); err != nil {
@@ -498,6 +496,25 @@ func (c *IPAMContext) nodeInit() error {
498496
c.dataStoreAccess = datastore.InitializeDataStores(skipNetworkCards, dsBackingStorePath(), c.enablePrefixDelegation, log)
499497
}
500498

499+
// check if primary ENI is excluded for ipv6 cluster
500+
if c.enableIPv6 && c.useSubnetDiscovery {
501+
for _, eni := range enis {
502+
log.Debugf("check if eni is excluded: %s", eni.ENIID)
503+
if c.excludedENIBasedOnSubnetTags(ctx, eni.ENIID, eni) {
504+
log.Debugf("excluding the eni: %s", eni.ENIID)
505+
enis = lo.Reject(enis, func(e awsutils.ENIMetadata, _ int) bool { return e.ENIID == eni.ENIID })
506+
}
507+
}
508+
if len(enis) == 0 {
509+
log.Debug("the instance has no valid ENI attached")
510+
if err := c.tryAllocateENI(ctx, DefaultNetworkCardIndex); err == nil {
511+
log.Infof("Allocated ENI successfully to Network Card %d", DefaultNetworkCardIndex)
512+
} else {
513+
return fmt.Errorf("Error trying to allocate ENI when primary IPv6 ENI is excluded: %w", err)
514+
}
515+
}
516+
}
517+
501518
for _, eni := range enis {
502519
log.Debugf("Discovered ENI %s, trying to set it up", eni.ENIID)
503520
isTrunkENI := eni.ENIID == metadataResult.TrunkENI
@@ -582,10 +599,10 @@ func (c *IPAMContext) nodeInit() error {
582599
// Refresh security groups and VPC CIDR blocks in the background
583600
// Ignoring errors since we will retry in 30s
584601
go wait.Forever(func() {
585-
c.awsClient.RefreshSGIDs(context.Background(), primaryENIMac, c.dataStoreAccess)
602+
c.awsClient.RefreshSGIDs(ctx, primaryENIMac, c.dataStoreAccess)
586603
// Also refresh custom security groups for secondary subnets
587604
if c.useSubnetDiscovery {
588-
c.awsClient.RefreshCustomSGIDs(context.Background(), c.dataStoreAccess)
605+
c.awsClient.RefreshCustomSGIDs(ctx, c.dataStoreAccess)
589606
}
590607
}, 30*time.Second)
591608
}
@@ -1274,26 +1291,7 @@ func (c *IPAMContext) setupENI(ctx context.Context, eni string, eniMetadata awsu
12741291

12751292
// Check if this ENI (primary or secondary) is in an excluded subnet and mark it for exclusion
12761293
if c.useSubnetDiscovery {
1277-
subnetID, err := c.awsClient.GetENISubnetID(ctx, eni)
1278-
if err != nil {
1279-
log.Warnf("setupENI: failed to get subnet ID for ENI %s: %v", eni, err)
1280-
} else {
1281-
excluded, err := c.awsClient.IsSubnetExcluded(ctx, subnetID)
1282-
if err != nil {
1283-
log.Warnf("setupENI: failed to check subnet exclusion for ENI %s (subnet %s): %v", eni, subnetID, err)
1284-
} else if excluded {
1285-
primaryENI := c.awsClient.GetPrimaryENI()
1286-
eniType := "secondary"
1287-
if eni == primaryENI {
1288-
eniType = "primary"
1289-
}
1290-
log.Infof("Marking %s ENI %s as excluded from pod IP allocation (in excluded subnet %s)", eniType, eni, subnetID)
1291-
err := c.dataStoreAccess.GetDataStore(eniMetadata.NetworkCard).SetENIExcludedForPodIPs(eni, true)
1292-
if err != nil {
1293-
log.Warnf("Failed to mark %s ENI %s as excluded: %v", eniType, eni, err)
1294-
}
1295-
}
1296-
}
1294+
c.excludedENIBasedOnSubnetTags(ctx, eni, eniMetadata)
12971295
}
12981296

12991297
// Store the addressable IP for the ENI
@@ -2519,8 +2517,6 @@ func (c *IPAMContext) cleanupExcludedENI(ctx context.Context, eniID string) {
25192517

25202518
// checkAndHandleAllENIExclusion checks all existing ENIs (primary and secondary) across all network cards for subnet exclusion
25212519
func (c *IPAMContext) checkAndHandleAllENIExclusion(ctx context.Context) {
2522-
log.Debugf("checkAndHandleAllENIExclusion: checking all existing ENIs for subnet exclusion")
2523-
25242520
// Iterate over all datastores (one per network card)
25252521
for _, ds := range c.dataStoreAccess.DataStores {
25262522
// Get ENI information using the public method
@@ -2864,6 +2860,35 @@ func (c *IPAMContext) SetAPIServerConnectivity(connected bool) {
28642860
}
28652861
}
28662862

2863+
// excludedENIBasedOnSubnetTags excludes ENIs in datastore if subnets have valid tags
2864+
func (c *IPAMContext) excludedENIBasedOnSubnetTags(ctx context.Context, eni string, eniMetadata awsutils.ENIMetadata) (excludeENI bool) {
2865+
// Check if this ENI (primary or secondary) is in an excluded subnet and mark it for exclusion
2866+
subnetID, err := c.awsClient.GetENISubnetID(ctx, eni)
2867+
if err != nil {
2868+
log.Warnf("setupENI: failed to get subnet ID for ENI %s: %v", eni, err)
2869+
} else {
2870+
excluded, err := c.awsClient.IsSubnetExcluded(ctx, subnetID)
2871+
if err != nil {
2872+
log.Warnf("setupENI: failed to check subnet exclusion for ENI %s (subnet %s): %v", eni, subnetID, err)
2873+
} else if excluded {
2874+
primaryENI := c.awsClient.GetPrimaryENI()
2875+
eniType := "secondary"
2876+
if eni == primaryENI {
2877+
eniType = "primary"
2878+
}
2879+
if !c.enableIPv6 {
2880+
log.Infof("Marking %s ENI %s as excluded from pod IP allocation (in excluded subnet %s)", eniType, eni, subnetID)
2881+
err := c.dataStoreAccess.GetDataStore(eniMetadata.NetworkCard).SetENIExcludedForPodIPs(eni, true)
2882+
if err != nil {
2883+
log.Warnf("Failed to mark %s ENI %s as excluded: %v", eniType, eni, err)
2884+
}
2885+
}
2886+
excludeENI = true
2887+
}
2888+
}
2889+
return
2890+
}
2891+
28672892
type Decisions struct {
28682893
Stats *datastore.DataStoreStats
28692894
IsLow bool

0 commit comments

Comments
 (0)