Skip to content

Commit e6a657c

Browse files
oilbeaterclaude
andauthored
fix(daemon): handle OVS bridge creation race condition with exchangeLinkName (#6374)
When exchangeLinkName=true, a race condition exists between OVS vswitchd processing the netlink rename notification and the bridge creation: OVS's netdev cache still holds a reference to the old NIC name, causing the bridge's kernel interface to never appear. Additionally, the retry path fails because changeProviderNicName cannot detect the already-renamed state. Fix by: 1. Making changeProviderNicName idempotent - when the source link is not found, check if the target already exists as a non-OVS interface, indicating a prior successful rename. 2. Adding waitForBridgeInterface to poll for the kernel interface after bridge creation. If it doesn't appear within 5s, delete the stale OVSDB bridge record so the reconciliation loop can retry cleanly. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 503c357 commit e6a657c

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

pkg/daemon/init_linux.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ func (c *Controller) changeProviderNicName(current, target string) (bool, error)
5252
link, err := netlink.LinkByName(current)
5353
if err != nil {
5454
if _, ok := err.(netlink.LinkNotFoundError); ok {
55+
// Check if the NIC was already renamed from a previous attempt
56+
targetLink, targetErr := netlink.LinkByName(target)
57+
if targetErr == nil && targetLink.Type() != "openvswitch" {
58+
klog.Infof("link %s already renamed to %s, skip rename", current, target)
59+
return true, nil
60+
}
5561
klog.Infof("link %s not found, skip", current)
5662
return false, nil
5763
}

pkg/daemon/ovs.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,19 @@ func (c *Controller) configExternalBridge(provider, bridge, nic string, exchange
259259
if err != nil {
260260
return fmt.Errorf("failed to create OVS bridge %s, %w: %q", bridge, err, output)
261261
}
262+
263+
if exchangeLinkName {
264+
if err := c.waitForBridgeInterface(bridge, 5*time.Second); err != nil {
265+
// Bridge created in OVSDB but kernel interface not available.
266+
// Delete the stale bridge to allow a clean retry.
267+
klog.Warningf("OVS bridge %s interface not ready, cleaning up: %v", bridge, err)
268+
if output, delErr := ovs.Exec(ovs.IfExists, "del-br", bridge); delErr != nil {
269+
klog.Errorf("failed to delete stale bridge %s: %v, %q", bridge, delErr, output)
270+
}
271+
return err
272+
}
273+
}
274+
262275
if output, err = ovs.Exec("list-ports", bridge); err != nil {
263276
return fmt.Errorf("failed to list ports of OVS bridge %s, %w: %q", bridge, err, output)
264277
}
@@ -307,6 +320,17 @@ func (c *Controller) configExternalBridge(provider, bridge, nic string, exchange
307320
return nil
308321
}
309322

323+
func (c *Controller) waitForBridgeInterface(bridge string, timeout time.Duration) error {
324+
deadline := time.Now().Add(timeout)
325+
for time.Now().Before(deadline) {
326+
if ok, _ := linkExists(bridge); ok {
327+
return nil
328+
}
329+
time.Sleep(100 * time.Millisecond)
330+
}
331+
return fmt.Errorf("timed out waiting for OVS bridge %s kernel interface", bridge)
332+
}
333+
310334
func initProviderChassisMac(provider string) error {
311335
if err := addOvnMapping("ovn-chassis-mac-mappings", provider, util.GenerateMac(), false); err != nil {
312336
klog.Error(err)

0 commit comments

Comments
 (0)