Skip to content

Commit f9865af

Browse files
oilbeaterclaude
andauthored
fix(e2e): wait for flow cleanup before checking service after deletion in metallb test (#6359)
The metallb e2e test "should support metallb and underlay combine" fails intermittently with curl exit code 56 (connection reset) when checking the second service's reachability immediately after deleting the first service. This happens because the OVN LoadBalancer data plane update and the daemon's syncFlows (ovs-ofctl replace-flows) have not yet completed, causing a brief disruption to the remaining service's data path. Add a waitUnderlayServiceFlowCleaned helper that polls all nodes until the deleted service's OpenFlow rules are fully removed from the external bridge, ensuring the data plane has stabilized before checking the second service. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d972147 commit f9865af

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

test/e2e/metallb/e2e_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,11 @@ var _ = framework.Describe("[group:metallb]", func() {
429429
ginkgo.By("Deleting the first service")
430430
serviceClient.DeleteSync(serviceName)
431431

432+
ginkgo.By("Waiting for first service's underlay OpenFlow rules to be cleaned up")
433+
for _, ingress := range service.Status.LoadBalancer.Ingress {
434+
waitUnderlayServiceFlowCleaned(nodeNames, providerNetworkName, ingress.IP, curlListenPort, 15*time.Second)
435+
}
436+
432437
ginkgo.By("Checking the second service is still reachable after first service deletion")
433438
for i, ingress := range service2.Status.LoadBalancer.Ingress {
434439
lbsvcIP2 := ingress.IP
@@ -614,3 +619,21 @@ func waitUnderlayServiceFlow(nodeName, providerNetworkName, serviceIP string, se
614619

615620
return flowFound
616621
}
622+
623+
func waitUnderlayServiceFlowCleaned(nodeNames []string, providerNetworkName, serviceIP string, servicePort int32, timeout time.Duration) {
624+
ginkgo.GinkgoHelper()
625+
626+
bridgeName := util.ExternalBridgeName(providerNetworkName)
627+
matchPort := fmt.Sprintf("tp_dst=%d", servicePort)
628+
629+
framework.WaitUntil(1*time.Second, timeout, func(_ context.Context) (bool, error) {
630+
for _, nodeName := range nodeNames {
631+
cmd := fmt.Sprintf("kubectl ko ofctl %s dump-flows %s | grep -w %s | grep -w %s",
632+
nodeName, bridgeName, serviceIP, matchPort)
633+
if _, err := exec.Command("bash", "-c", cmd).CombinedOutput(); err == nil {
634+
return false, nil // flow still exists on this node
635+
}
636+
}
637+
return true, nil // flow cleaned from all nodes
638+
}, fmt.Sprintf("underlay service flow for %s should be cleaned up", serviceIP))
639+
}

0 commit comments

Comments
 (0)