Skip to content

Commit 9c48370

Browse files
oilbeaterclaude
andauthored
fix: harden provider network cleanup to prevent orphaned bridges and routes (#6448)
When the host NIC disappears before daemon cleanup runs (e.g., Docker disconnects the node), removeProviderNic() returned early without cleaning up addresses and routes on the OVS bridge, leaving stale kernel routes that cause subnet conflicts for subsequent operations. When ovn-bridge-mappings has no entry for a provider (e.g., bridge setup failed or daemon restarted), ovsCleanProviderNetwork() returned early without attempting to clean up any orphaned OVS bridge or restore renamed NICs, leaving the node network in a polluted state. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dc5ad3b commit 9c48370

File tree

2 files changed

+45
-17
lines changed

2 files changed

+45
-17
lines changed

pkg/daemon/init.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,16 +154,29 @@ func (c *Controller) ovsCleanProviderNetwork(provider string) error {
154154

155155
brName := mappings[provider]
156156
if brName == "" {
157-
return nil
157+
// The mapping may have been cleared before cleanup finished (e.g., daemon restart
158+
// or race with bridge setup failure). Fall back to the default bridge name to clean
159+
// up any orphaned bridge and restore the original NIC name.
160+
brName = util.ExternalBridgeName(provider)
161+
klog.Infof("no ovn-bridge-mappings entry for provider %s, trying default bridge name %s", provider, brName)
158162
}
159163

160164
output, err := ovs.Exec("list-br")
161165
if err != nil {
162166
return fmt.Errorf("failed to list OVS bridges: %w, %q", err, output)
163167
}
164168

165-
if !slices.Contains(strings.Split(output, "\n"), brName) {
169+
bridges := strings.Split(output, "\n")
170+
if !slices.Contains(bridges, brName) {
166171
klog.V(3).Infof("ovs bridge %s not found", brName)
172+
// Even if no OVS bridge exists, check if a NIC was renamed to br-<provider>
173+
// and needs to be restored (e.g., exchangeLinkName was used but bridge setup failed).
174+
if br := util.ExternalBridgeName(provider); br != brName {
175+
if _, err = c.changeProviderNicName(br, brName); err != nil {
176+
klog.Errorf("failed to change provider nic name from %s to %s: %v", br, brName, err)
177+
return err
178+
}
179+
}
167180
return nil
168181
}
169182

pkg/daemon/ovs_linux.go

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,14 +1553,18 @@ func (c *Controller) removeProviderNic(nicName, brName string) error {
15531553

15541554
nic, err := netlink.LinkByName(nicName)
15551555
if err != nil {
1556-
if _, ok := err.(netlink.LinkNotFoundError); ok {
1557-
klog.Warningf("failed to get nic by name %s: %v", nicName, err)
1558-
return nil
1556+
if _, ok := err.(netlink.LinkNotFoundError); !ok {
1557+
return fmt.Errorf("failed to get nic by name %s: %w", nicName, err)
15591558
}
1560-
return fmt.Errorf("failed to get nic by name %s: %w", nicName, err)
1559+
klog.Warningf("nic %s not found, will still clean up addresses and routes on bridge %s", nicName, brName)
15611560
}
1561+
15621562
bridge, err := netlink.LinkByName(brName)
15631563
if err != nil {
1564+
if _, ok := err.(netlink.LinkNotFoundError); ok {
1565+
klog.Warningf("bridge %s not found, skip cleanup", brName)
1566+
return nil
1567+
}
15641568
return fmt.Errorf("failed to get bridge by name %s: %w", brName, err)
15651569
}
15661570

@@ -1591,16 +1595,20 @@ func (c *Controller) removeProviderNic(nicName, brName string) error {
15911595
}
15921596
klog.Infof("address %q has been deleted from link %s", addr.String(), brName)
15931597

1594-
addr.Label = ""
1595-
if err = netlink.AddrReplace(nic, &addr); err != nil {
1596-
return fmt.Errorf("failed to replace address %q on nic %s: %w", addr.String(), nicName, err)
1598+
if nic != nil {
1599+
addr.Label = ""
1600+
if err = netlink.AddrReplace(nic, &addr); err != nil {
1601+
return fmt.Errorf("failed to replace address %q on nic %s: %w", addr.String(), nicName, err)
1602+
}
1603+
klog.Infof("address %q has been added/replaced to link %s", addr.String(), nicName)
15971604
}
1598-
klog.Infof("address %q has been added/replaced to link %s", addr.String(), nicName)
15991605
}
16001606

1601-
if err = netlink.LinkSetUp(nic); err != nil {
1602-
klog.Errorf("failed to set link %s up: %v", nicName, err)
1603-
return err
1607+
if nic != nil {
1608+
if err = netlink.LinkSetUp(nic); err != nil {
1609+
klog.Errorf("failed to set link %s up: %v", nicName, err)
1610+
return err
1611+
}
16041612
}
16051613

16061614
for _, scope := range routeScopeOrders {
@@ -1610,11 +1618,18 @@ func (c *Controller) removeProviderNic(nicName, brName string) error {
16101618
continue
16111619
}
16121620
if route.Scope == scope {
1613-
route.LinkIndex = nic.Attrs().Index
1614-
if err = netlink.RouteReplace(&route); err != nil {
1615-
return fmt.Errorf("failed to add/replace route %s: %w", route.String(), err)
1621+
if nic != nil {
1622+
route.LinkIndex = nic.Attrs().Index
1623+
if err = netlink.RouteReplace(&route); err != nil {
1624+
return fmt.Errorf("failed to add/replace route %s: %w", route.String(), err)
1625+
}
1626+
klog.Infof("route %q has been added/replaced to link %s", route.String(), nicName)
1627+
} else {
1628+
if err = netlink.RouteDel(&route); err != nil {
1629+
return fmt.Errorf("failed to delete route %s from bridge %s: %w", route.String(), brName, err)
1630+
}
1631+
klog.Infof("route %q has been deleted from link %s (nic %s not found)", route.String(), brName, nicName)
16161632
}
1617-
klog.Infof("route %q has been added/replaced to link %s", route.String(), nicName)
16181633
}
16191634
}
16201635
}

0 commit comments

Comments
 (0)