Skip to content

Commit 25a63fa

Browse files
yprokuleclaude
andauthored
system-tests: fix IPv6 DAD check order and error propagation (#1294)
Fix two related bugs in IPv6 Duplicate Address Detection (DAD) handling: 1. Check order bug: Functions checked for "tentative" state before "dadfailed" state, preventing detection of permanent DAD failures when both flags were present (e.g., "dadfailed tentative"). 2. Error propagation bug: DAD failure errors were not propagated correctly, causing tests to retry for 3 minutes instead of failing immediately with clear error messages. Changes: - checkIPv6AddressState() and checkPodIPv6Ready(): Swap check order to detect "dadfailed" before "tentative" (permanent before transient) - whereabouts-statefulset.go (getPodWhereaboutsIPs): * Change Eventually from func() bool to func() error pattern * Use StopTrying() for DAD failures to abort Gomega Eventually immediately * Return regular error for transient failures (exec/scanner errors) to retry - sriov-pod-level-bond.go (inspectPodLevelBondedInterfaceConfig): * Return error for DAD failures to abort k8s PollUntilContextTimeout immediately * Return false, nil for transient failures (scanner errors) to continue polling Impact: Tests with DAD failures now fail immediately (subsecond) with specific "IPv6 DAD failed" errors instead of timing out after 3 minutes with generic error messages. Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent a90dd96 commit 25a63fa

2 files changed

Lines changed: 51 additions & 28 deletions

File tree

tests/system-tests/rdscore/internal/rdscorecommon/sriov-pod-level-bond.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -667,20 +667,20 @@ func checkIPv6AddressState(output, ipv6Addr string) (bool, error) {
667667

668668
// Look for lines containing the IPv6 address
669669
if strings.Contains(line, ipv6Addr) && strings.Contains(line, "inet6") {
670-
// Check if the address is in tentative state (DAD in progress)
671-
if strings.Contains(line, "tentative") {
672-
klog.V(100).Infof("IPv6 address %s is in tentative state (DAD in progress): %s",
670+
// Check if DAD failed (permanent failure - must check first)
671+
if strings.Contains(line, "dadfailed") {
672+
klog.V(100).Infof("IPv6 address %s DAD failed: %s",
673673
ipv6Addr, strings.TrimSpace(line))
674674

675-
return false, nil
675+
return false, fmt.Errorf("IPv6 DAD failed for address %s", ipv6Addr)
676676
}
677677

678-
// Check if DAD failed
679-
if strings.Contains(line, "dadfailed") {
680-
klog.V(100).Infof("IPv6 address %s DAD failed: %s",
678+
// Check if the address is in tentative state (DAD in progress)
679+
if strings.Contains(line, "tentative") {
680+
klog.V(100).Infof("IPv6 address %s is in tentative state (DAD in progress): %s",
681681
ipv6Addr, strings.TrimSpace(line))
682682

683-
return false, fmt.Errorf("IPv6 DAD failed for address %s", ipv6Addr)
683+
return false, nil
684684
}
685685

686686
// Address found and not tentative - ready to use
@@ -704,7 +704,7 @@ func checkIPv6AddressState(output, ipv6Addr string) (bool, error) {
704704
return false, nil
705705
}
706706

707-
//nolint:funlen
707+
//nolint:funlen,gocognit
708708
func inspectPodLevelBondedInterfaceConfig(podObj *pod.Builder, ipv4Addr, ipv6Addr string) (bool, error) {
709709
klog.V(100).Infof("Verify pod-level bonded interface configuration for pod %q in namespace %q",
710710
podObj.Definition.Name, podObj.Definition.Namespace)
@@ -782,10 +782,19 @@ func inspectPodLevelBondedInterfaceConfig(podObj *pod.Builder, ipv4Addr, ipv6Add
782782
// Check that IPv6 address is not in tentative state (DAD must be complete)
783783
ipv6Ready, err := checkIPv6AddressState(output, ipv6Addr)
784784
if err != nil {
785-
klog.V(100).Infof("IPv6 address %s DAD failed for pod %s in namespace %s: %v",
785+
// Check if this is a permanent DAD failure
786+
if strings.Contains(err.Error(), "IPv6 DAD failed") {
787+
klog.V(100).Infof("IPv6 address %s DAD failed for pod %s in namespace %s: %v",
788+
ipv6Addr, podObj.Definition.Name, podObj.Definition.Namespace, err)
789+
790+
return false, err
791+
}
792+
793+
// For other errors (scanner errors), continue retrying
794+
klog.V(100).Infof("Temporary error checking IPv6 address %s for pod %s in namespace %s: %v, will retry",
786795
ipv6Addr, podObj.Definition.Name, podObj.Definition.Namespace, err)
787796

788-
return false, err
797+
return false, nil
789798
}
790799

791800
if !ipv6Ready {

tests/system-tests/rdscore/internal/rdscorecommon/whereabouts-statefulset.go

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,14 @@ func checkPodIPv6Ready(podObj *pod.Builder, interfaceName, ipv6Addr string) (boo
373373

374374
// Look for lines containing the IPv6 address
375375
if strings.Contains(line, ipv6Addr) && strings.Contains(line, "inet6") {
376+
// Check if DAD failed (permanent failure - must check first)
377+
if strings.Contains(line, "dadfailed") {
378+
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("IPv6 address %s DAD failed for pod %q: %s",
379+
ipv6Addr, podObj.Object.Name, strings.TrimSpace(line))
380+
381+
return false, fmt.Errorf("IPv6 DAD failed for address %s in pod %s", ipv6Addr, podObj.Object.Name)
382+
}
383+
376384
// Check if the address is in tentative state (DAD in progress)
377385
if strings.Contains(line, "tentative") {
378386
klog.V(rdscoreparams.RDSCoreLogLevel).Infof(
@@ -382,14 +390,6 @@ func checkPodIPv6Ready(podObj *pod.Builder, interfaceName, ipv6Addr string) (boo
382390
return false, nil
383391
}
384392

385-
// Check if DAD failed
386-
if strings.Contains(line, "dadfailed") {
387-
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("IPv6 address %s DAD failed for pod %q: %s",
388-
ipv6Addr, podObj.Object.Name, strings.TrimSpace(line))
389-
390-
return false, fmt.Errorf("IPv6 DAD failed for address %s in pod %s", ipv6Addr, podObj.Object.Name)
391-
}
392-
393393
// Address found and not tentative - ready to use
394394
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("IPv6 address %s is ready (not tentative) for pod %q",
395395
ipv6Addr, podObj.Object.Name)
@@ -415,6 +415,8 @@ func checkPodIPv6Ready(podObj *pod.Builder, interfaceName, ipv6Addr string) (boo
415415
}
416416

417417
// getPodWhereaboutsIPs gets the IP addresses for the given pod.
418+
//
419+
//nolint:gocognit
418420
func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[string][]NetworkInterface {
419421
podWhereaboutsIPs := make(map[string][]NetworkInterface)
420422

@@ -425,7 +427,7 @@ func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[s
425427
for _, _pod := range activePods {
426428
var networkInterface []NetworkInterface
427429

428-
Eventually(func() bool {
430+
Eventually(func() error {
429431
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("Executing command %q within pod %q in %q namespace",
430432
cmdGetIPAddr, _pod.Object.Name, _pod.Object.Namespace)
431433

@@ -434,14 +436,14 @@ func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[s
434436
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("Failed to execute command within pod %q in %q namespace: %s",
435437
_pod.Object.Name, _pod.Object.Namespace, err)
436438

437-
return false
439+
return fmt.Errorf("failed to execute command in pod: %w", err)
438440
}
439441

440442
if addrBuffInfo.Len() == 0 {
441443
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("Empty output from command within pod %q in %q namespace",
442444
_pod.Object.Name, _pod.Object.Namespace)
443445

444-
return false
446+
return fmt.Errorf("empty output from command")
445447
}
446448

447449
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("Unmarshalling IP addresses")
@@ -451,7 +453,7 @@ func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[s
451453
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("Failed to unmarshal IP addresses for pod %q in %q namespace: %s",
452454
_pod.Object.Name, _pod.Object.Namespace, err)
453455

454-
return false
456+
return fmt.Errorf("failed to unmarshal IP addresses: %w", err)
455457
}
456458

457459
klog.V(rdscoreparams.RDSCoreLogLevel).Infof("IP addresses: %+v", networkInterface)
@@ -464,19 +466,31 @@ func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[s
464466

465467
ipv6Ready, err := checkPodIPv6Ready(_pod, interfaceName, addr.Local)
466468
if err != nil {
469+
// Check if this is a permanent DAD failure
470+
if strings.Contains(err.Error(), "IPv6 DAD failed") {
471+
klog.V(rdscoreparams.RDSCoreLogLevel).Infof(
472+
"IPv6 address %s DAD failed for pod %q in %q namespace: %v",
473+
addr.Local, _pod.Object.Name, _pod.Object.Namespace, err)
474+
475+
return StopTrying(fmt.Sprintf(
476+
"IPv6 DAD failed for address %s in pod %s/%s",
477+
addr.Local, _pod.Object.Namespace, _pod.Object.Name)).Wrap(err)
478+
}
479+
480+
// For other errors (exec failures, scanner errors), continue retrying
467481
klog.V(rdscoreparams.RDSCoreLogLevel).Infof(
468-
"IPv6 address %s DAD failed for pod %q in %q namespace: %v",
482+
"Temporary error checking IPv6 address %s for pod %q in %q namespace: %v, will retry",
469483
addr.Local, _pod.Object.Name, _pod.Object.Namespace, err)
470484

471-
return false
485+
return fmt.Errorf("failed to check IPv6 address: %w", err)
472486
}
473487

474488
if !ipv6Ready {
475489
klog.V(rdscoreparams.RDSCoreLogLevel).Infof(
476490
"IPv6 address %s not ready yet (tentative state) for pod %q in %q namespace, retrying...",
477491
addr.Local, _pod.Object.Name, _pod.Object.Namespace)
478492

479-
return false
493+
return fmt.Errorf("IPv6 address not ready (tentative state)")
480494
}
481495

482496
klog.V(rdscoreparams.RDSCoreLogLevel).Infof(
@@ -487,8 +501,8 @@ func getPodWhereaboutsIPs(activePods []*pod.Builder, interfaceName string) map[s
487501

488502
podWhereaboutsIPs[_pod.Object.Name] = networkInterface
489503

490-
return true
491-
}).WithContext(ctx).WithPolling(15*time.Second).WithTimeout(3*time.Minute).Should(BeTrue(),
504+
return nil
505+
}).WithContext(ctx).WithPolling(15*time.Second).WithTimeout(3*time.Minute).Should(Succeed(),
492506
"Failed to get IP addresses for pod %q in %q namespace", _pod.Object.Name, _pod.Object.Namespace)
493507
}
494508

0 commit comments

Comments
 (0)