Skip to content

Commit 9468534

Browse files
committed
pkg ipamd: gracefully exclude primary ENI if existing pods
1 parent 9d1e3a4 commit 9468534

4 files changed

Lines changed: 241 additions & 15 deletions

File tree

pkg/ipamd/datastore/data_store.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,14 @@ func (ds *DataStore) UnassignPodIPAddress(ipamKey IPAMKey) (e *ENI, ip string, d
12581258
ipamKey, addr.Address, eni.DeviceNumber)
12591259
// Decrement ENI IP usage when a pod is deallocated
12601260
prometheusmetrics.EniIPsInUse.WithLabelValues(eni.ID).Dec()
1261+
1262+
// Check if ENI is excluded and CIDR is now empty - cleanup if needed
1263+
if eni.IsExcludedForPodIPs && availableCidr.AssignedIPAddressesInCidr() == 0 {
1264+
ds.log.Infof("CIDR %s on excluded ENI %s is now empty, scheduling for cleanup", availableCidr.Cidr.String(), eni.ID)
1265+
// Schedule async cleanup of the empty CIDR
1266+
go ds.deallocateEmptyCIDR(eni.ID, availableCidr)
1267+
}
1268+
12611269
return eni, addr.Address, eni.DeviceNumber, originalIPAMMetadata.InterfacesCount, nil
12621270
}
12631271

@@ -1733,3 +1741,66 @@ func (ds *DataStoreAccess) ReadAllDataStores(enableIPv6 bool) error {
17331741
}
17341742
return nil
17351743
}
1744+
1745+
// deallocateEmptyCIDR asynchronously deallocates an empty CIDR from an excluded ENI
1746+
func (ds *DataStore) deallocateEmptyCIDR(eniID string, cidrToCleanup *CidrInfo) {
1747+
cidrStr := cidrToCleanup.Cidr.String()
1748+
ds.log.Infof("Starting async cleanup for empty CIDR %s on excluded ENI %s", cidrStr, eniID)
1749+
1750+
// Add delay to avoid race conditions with pod cleanup
1751+
time.Sleep(5 * time.Second)
1752+
1753+
ds.lock.Lock()
1754+
defer ds.lock.Unlock()
1755+
1756+
// Double-check that the CIDR is still empty and ENI is still excluded
1757+
eni := ds.eniPool[eniID]
1758+
if eni == nil {
1759+
ds.log.Warnf("ENI %s not found during CIDR cleanup", eniID)
1760+
return
1761+
}
1762+
1763+
if !eni.IsExcludedForPodIPs {
1764+
ds.log.Infof("ENI %s is no longer excluded, skipping CIDR cleanup", eniID)
1765+
return
1766+
}
1767+
1768+
// Find the CIDR in the appropriate map using AddressFamily
1769+
var targetCidr *CidrInfo
1770+
var isIPv4 bool = cidrToCleanup.AddressFamily == "4"
1771+
1772+
if isIPv4 {
1773+
targetCidr = eni.AvailableIPv4Cidrs[cidrStr]
1774+
} else {
1775+
targetCidr = eni.IPv6Cidrs[cidrStr]
1776+
}
1777+
1778+
if targetCidr == nil {
1779+
ds.log.Warnf("CIDR %s not found on ENI %s during cleanup", cidrStr, eniID)
1780+
return
1781+
}
1782+
1783+
// Check if CIDR is still empty
1784+
if targetCidr.AssignedIPAddressesInCidr() > 0 {
1785+
ds.log.Infof("CIDR %s on ENI %s is no longer empty, skipping cleanup", cidrStr, eniID)
1786+
return
1787+
}
1788+
1789+
// Remove the empty CIDR from the ENI structure
1790+
ds.log.Infof("Removing empty CIDR %s from excluded ENI %s in datastore", cidrStr, eniID)
1791+
1792+
// Remove the CIDR from the appropriate ENI map
1793+
if isIPv4 {
1794+
delete(eni.AvailableIPv4Cidrs, cidrStr)
1795+
} else {
1796+
delete(eni.IPv6Cidrs, cidrStr)
1797+
}
1798+
1799+
// Update backing store
1800+
if err := ds.writeBackingStoreUnsafe(); err != nil {
1801+
ds.log.Warnf("Failed to write backing store after removing empty CIDR: %v", err)
1802+
// Note: We continue since the CIDR removal from local state was successful
1803+
}
1804+
1805+
ds.log.Infof("Successfully removed empty CIDR %s from excluded ENI %s", cidrStr, eniID)
1806+
}

pkg/ipamd/datastore/data_store_test.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,3 +1968,160 @@ func TestFreeablePrefixesBothIPv4AndIPv6(t *testing.T) {
19681968
freeablePrefixes = ds.FreeablePrefixes("eni-nonexistent")
19691969
assert.Nil(t, freeablePrefixes, "Should return nil for non-existent ENI")
19701970
}
1971+
1972+
func TestDeallocateEmptyCIDR(t *testing.T) {
1973+
ds := NewDataStore(Testlog, NullCheckpoint{}, false, defaultNetworkCard)
1974+
1975+
// Setup test ENI
1976+
eniID := "eni-test-dealloc"
1977+
err := ds.AddENI(eniID, 1, false, false, false)
1978+
assert.NoError(t, err)
1979+
1980+
// Mark ENI as excluded for pod IPs
1981+
err = ds.SetENIExcludedForPodIPs(eniID, true)
1982+
assert.NoError(t, err)
1983+
1984+
t.Run("IPv4 CIDR cleanup on excluded ENI", func(t *testing.T) {
1985+
// Add IPv4 CIDR to ENI
1986+
ipv4Cidr := net.IPNet{IP: net.ParseIP("192.168.1.0"), Mask: net.CIDRMask(24, 32)}
1987+
err = ds.AddIPv4CidrToStore(eniID, ipv4Cidr, false)
1988+
assert.NoError(t, err)
1989+
1990+
// Verify CIDR exists
1991+
eni := ds.eniPool[eniID]
1992+
cidrStr := ipv4Cidr.String()
1993+
cidrInfo, exists := eni.AvailableIPv4Cidrs[cidrStr]
1994+
assert.True(t, exists, "IPv4 CIDR should exist before cleanup")
1995+
1996+
// Call deallocateEmptyCIDR
1997+
ds.deallocateEmptyCIDR(eniID, cidrInfo)
1998+
1999+
// Verify CIDR is removed
2000+
_, exists = eni.AvailableIPv4Cidrs[cidrStr]
2001+
assert.False(t, exists, "IPv4 CIDR should be removed after cleanup")
2002+
})
2003+
2004+
t.Run("IPv6 CIDR cleanup on excluded ENI", func(t *testing.T) {
2005+
// Add IPv6 CIDR to ENI
2006+
ipv6Cidr := net.IPNet{IP: net.ParseIP("2001:db8:1::"), Mask: net.CIDRMask(64, 128)}
2007+
err = ds.AddIPv6CidrToStore(eniID, ipv6Cidr, false)
2008+
assert.NoError(t, err)
2009+
2010+
// Verify CIDR exists
2011+
eni := ds.eniPool[eniID]
2012+
cidrStr := ipv6Cidr.String()
2013+
cidrInfo, exists := eni.IPv6Cidrs[cidrStr]
2014+
assert.True(t, exists, "IPv6 CIDR should exist before cleanup")
2015+
2016+
// Call deallocateEmptyCIDR
2017+
ds.deallocateEmptyCIDR(eniID, cidrInfo)
2018+
2019+
// Verify CIDR is removed
2020+
_, exists = eni.IPv6Cidrs[cidrStr]
2021+
assert.False(t, exists, "IPv6 CIDR should be removed after cleanup")
2022+
})
2023+
2024+
t.Run("Skip cleanup when ENI is not excluded", func(t *testing.T) {
2025+
// Create new non-excluded ENI
2026+
nonExcludedENI := "eni-not-excluded"
2027+
err := ds.AddENI(nonExcludedENI, 1, false, false, false)
2028+
assert.NoError(t, err)
2029+
2030+
// Don't mark as excluded (default is false)
2031+
2032+
// Add IPv4 CIDR
2033+
ipv4Cidr := net.IPNet{IP: net.ParseIP("192.168.2.0"), Mask: net.CIDRMask(24, 32)}
2034+
err = ds.AddIPv4CidrToStore(nonExcludedENI, ipv4Cidr, false)
2035+
assert.NoError(t, err)
2036+
2037+
// Get CIDR info
2038+
eni := ds.eniPool[nonExcludedENI]
2039+
cidrStr := ipv4Cidr.String()
2040+
cidrInfo, exists := eni.AvailableIPv4Cidrs[cidrStr]
2041+
assert.True(t, exists, "IPv4 CIDR should exist")
2042+
2043+
// Call deallocateEmptyCIDR - should not remove CIDR since ENI is not excluded
2044+
ds.deallocateEmptyCIDR(nonExcludedENI, cidrInfo)
2045+
2046+
// Verify CIDR still exists
2047+
_, exists = eni.AvailableIPv4Cidrs[cidrStr]
2048+
assert.True(t, exists, "IPv4 CIDR should remain since ENI is not excluded")
2049+
})
2050+
2051+
t.Run("Skip cleanup when CIDR has assigned IPs", func(t *testing.T) {
2052+
// Create fresh datastore for isolated test
2053+
dsIsolated := NewDataStore(Testlog, NullCheckpoint{}, false, defaultNetworkCard)
2054+
2055+
// Create new excluded ENI
2056+
excludedENI := "eni-excluded-with-ips"
2057+
err := dsIsolated.AddENI(excludedENI, 1, false, false, false)
2058+
assert.NoError(t, err)
2059+
2060+
err = dsIsolated.SetENIExcludedForPodIPs(excludedENI, true)
2061+
assert.NoError(t, err)
2062+
2063+
// Add IPv4 CIDR
2064+
ipv4Cidr := net.IPNet{IP: net.ParseIP("192.168.10.0"), Mask: net.CIDRMask(24, 32)}
2065+
err = dsIsolated.AddIPv4CidrToStore(excludedENI, ipv4Cidr, false)
2066+
assert.NoError(t, err)
2067+
2068+
// Manually assign an IP to the CIDR to simulate non-empty state
2069+
eni := dsIsolated.eniPool[excludedENI]
2070+
cidrStr := ipv4Cidr.String()
2071+
cidrInfo, exists := eni.AvailableIPv4Cidrs[cidrStr]
2072+
assert.True(t, exists, "IPv4 CIDR should exist")
2073+
2074+
// Manually mark an IP as assigned in the CIDR
2075+
testIP := net.ParseIP("192.168.10.1")
2076+
cidrInfo.IPAddresses[testIP.String()] = &AddressInfo{
2077+
Address: testIP.String(),
2078+
IPAMKey: IPAMKey{NetworkName: "test", ContainerID: "test-container", IfName: "eth0"},
2079+
IPAMMetadata: IPAMMetadata{K8SPodNamespace: "default", K8SPodName: "test-pod"},
2080+
AssignedTime: time.Now(),
2081+
}
2082+
2083+
// Verify CIDR has assigned IPs
2084+
assert.Greater(t, cidrInfo.AssignedIPAddressesInCidr(), 0, "CIDR should have assigned IPs")
2085+
2086+
// Call deallocateEmptyCIDR - should not remove CIDR since it has assigned IPs
2087+
dsIsolated.deallocateEmptyCIDR(excludedENI, cidrInfo)
2088+
2089+
// Verify CIDR still exists
2090+
_, exists = eni.AvailableIPv4Cidrs[cidrStr]
2091+
assert.True(t, exists, "IPv4 CIDR should remain since it has assigned IPs")
2092+
})
2093+
2094+
t.Run("Handle non-existent ENI gracefully", func(t *testing.T) {
2095+
// Create dummy CIDR info
2096+
dummyCidr := &CidrInfo{
2097+
Cidr: net.IPNet{IP: net.ParseIP("192.168.4.0"), Mask: net.CIDRMask(24, 32)},
2098+
AddressFamily: "4",
2099+
}
2100+
2101+
// Should not panic when ENI doesn't exist
2102+
ds.deallocateEmptyCIDR("eni-nonexistent", dummyCidr)
2103+
})
2104+
2105+
t.Run("Handle non-existent CIDR gracefully", func(t *testing.T) {
2106+
// Create ENI
2107+
testENI := "eni-cidr-test"
2108+
err := ds.AddENI(testENI, 1, false, false, false)
2109+
assert.NoError(t, err)
2110+
2111+
err = ds.SetENIExcludedForPodIPs(testENI, true)
2112+
assert.NoError(t, err)
2113+
2114+
// Create CIDR info that doesn't exist in the ENI
2115+
nonExistentCidr := &CidrInfo{
2116+
Cidr: net.IPNet{IP: net.ParseIP("192.168.5.0"), Mask: net.CIDRMask(24, 32)},
2117+
AddressFamily: "4",
2118+
}
2119+
2120+
// Should handle gracefully when CIDR doesn't exist
2121+
ds.deallocateEmptyCIDR(testENI, nonExistentCidr)
2122+
2123+
// Verify ENI still exists and is unaffected
2124+
_, exists := ds.eniPool[testENI]
2125+
assert.True(t, exists, "ENI should still exist")
2126+
})
2127+
}

pkg/ipamd/ipamd.go

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -601,23 +601,16 @@ func (c *IPAMContext) nodeInit() error {
601601
return err
602602
}
603603

604-
// Check if primary ENI was previously used and re-include if needed
604+
// Handle primary ENI exclusion - always respect exclusion tags
605605
if c.isPrimarySubnetExcluded {
606+
primaryENI := c.awsClient.GetPrimaryENI()
606607
if c.primaryENIWasPreviouslyUsed() {
607-
primaryENI := c.awsClient.GetPrimaryENI()
608-
log.Infof("Primary subnet is tagged for exclusion but primary ENI %s has existing pod IPs, re-including for backward compatibility", primaryENI)
609-
err := c.dataStoreAccess.GetDataStore(DefaultNetworkCardIndex).SetENIExcludedForPodIPs(primaryENI, false)
610-
if err != nil {
611-
log.Warnf("Failed to re-include primary ENI: %v", err)
612-
} else {
613-
c.isPrimarySubnetExcluded = false
614-
}
608+
log.Infof("Primary ENI %s is excluded from pod IP allocation but has existing pod IPs, will cleanup resources as pods terminate", primaryENI)
615609
} else {
616-
// Primary ENI is excluded and has no assigned pods - cleanup unassigned resources
617-
primaryENI := c.awsClient.GetPrimaryENI()
618610
log.Infof("Primary ENI %s is excluded from pod IP allocation, cleaning up unassigned resources", primaryENI)
619-
c.cleanupExcludedPrimaryENI(ctx, primaryENI)
620611
}
612+
// Always cleanup free resources on excluded primary ENI
613+
c.cleanupExcludedPrimaryENI(ctx, primaryENI)
621614
}
622615

623616
if c.enableIPv4 {

pkg/ipamd/ipamd_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2889,16 +2889,21 @@ func TestNodeInitPrimarySubnetExclusionWithExistingPodIPs(t *testing.T) {
28892889
}
28902890
m.k8sClient.Create(ctx, &fakeNode)
28912891

2892+
// Expect cleanup calls for excluded primary ENI (with existing pod IPs)
2893+
// The cleanup function will try to unassign any unassigned IPs/prefixes
2894+
m.awsutils.EXPECT().DeallocIPAddresses(gomock.Any(), primaryENIid, gomock.Any()).Return(nil).AnyTimes()
2895+
m.awsutils.EXPECT().DeallocPrefixAddresses(gomock.Any(), primaryENIid, gomock.Any()).Return(nil).AnyTimes()
2896+
28922897
// Add IPs
28932898
m.awsutils.EXPECT().AllocIPAddresses(gomock.Any(), gomock.Any(), gomock.Any())
28942899
os.Setenv("MY_NODE_NAME", myNodeName)
28952900
err := mockContext.nodeInit()
28962901
assert.NoError(t, err)
28972902

2898-
// Verify that primary ENI was re-included due to existing pod IPs
2899-
assert.False(t, mockContext.isPrimarySubnetExcluded, "Primary subnet should be re-included due to existing pod IPs")
2903+
// Verify that primary ENI exclusion is now always respected
2904+
assert.True(t, mockContext.isPrimarySubnetExcluded, "Primary subnet should remain excluded even with existing pod IPs")
29002905
isExcluded := mockContext.dataStoreAccess.GetDataStore(defaultNetworkCard).IsENIExcludedForPodIPs(primaryENIid)
2901-
assert.False(t, isExcluded, "Primary ENI should not be excluded from pod IP allocation due to existing pod IPs")
2906+
assert.True(t, isExcluded, "Primary ENI should remain excluded from pod IP allocation despite existing pod IPs")
29022907
}
29032908

29042909
func TestNodeInitPrimarySubnetExclusionWithoutExistingPodIPs(t *testing.T) {

0 commit comments

Comments
 (0)