Skip to content

Commit 4640b1c

Browse files
greyerofedcdavid
authored andcommitted
CI: Test for Dual NIC BC with HA for phc2sys
Added a new test case for the DualNICBC in HA mode. The test case implements this sequence: - Turn down the slave interface in the primary BC. - Wait for phc2sys logs that reflect the selection of the secondary BC's slave interface. - Turn up the original slave interface in the primary BC. - Wait for ph2sys logs that reflect the selection of the original slave interface of the primary BC. In order for this test case to work, I've done some modifications in the test framework: - Added a function to create a separate third config for DualNIC Boundary Clocks working in HA mode. This extra config has phc2sys config opts and haProfiles in the ptpSettings field. - Updated the discovery mechanism to detect the DualNICBC HA mode when two BC configs and the extra HA config for ph2sys are found. - Refactored GetPodLogsRegex and created GetPodLogsRegexSince and moved the regex detection to a separate func findRegexInStream().
1 parent 1b2ca4a commit 4640b1c

File tree

7 files changed

+325
-51
lines changed

7 files changed

+325
-51
lines changed

scripts/run-ci-github.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,7 @@ PTP_TEST_MODE=oc ginkgo --skip=".*The interfaces supporting ptp can be discovere
3939
PTP_TEST_MODE=bc ginkgo --skip=".*The interfaces supporting ptp can be discovered correctly.*" --skip="Negative - run pmc in a new unprivileged pod on the slave node.*" -v --keep-going --output-dir=$JUNIT_OUTPUT_DIR --junit-report=$JUNIT_OUTPUT_FILE -v "$SUITE"/serial
4040
# Dual NIC BC
4141
PTP_TEST_MODE=dualnicbc ginkgo --skip=".*The interfaces supporting ptp can be discovered correctly.*" --skip="Negative - run pmc in a new unprivileged pod on the slave node.*" -v --keep-going --output-dir=$JUNIT_OUTPUT_DIR --junit-report=$JUNIT_OUTPUT_FILE -v "$SUITE"/serial
42+
# Dual NIC BC HA
43+
PTP_TEST_MODE=dualnicbcha ginkgo --skip=".*The interfaces supporting ptp can be discovered correctly.*" --skip="Negative - run pmc in a new unprivileged pod on the slave node.*" -v --keep-going --output-dir=$JUNIT_OUTPUT_DIR --junit-report=$JUNIT_OUTPUT_FILE -v "$SUITE"/serial
4244
# Dual port
4345
PTP_TEST_MODE=dualfollower ginkgo --skip=".*The interfaces supporting ptp can be discovered correctly.*" --skip="Negative - run pmc in a new unprivileged pod on the slave node.*" -v --keep-going --output-dir=$JUNIT_OUTPUT_DIR --junit-report=$JUNIT_OUTPUT_FILE -v "$SUITE"/serial

test/conformance/serial/ptp.go

Lines changed: 113 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"os"
99
"path"
1010
"regexp"
11+
"slices"
1112
"sort"
1213
"strconv"
1314
"strings"
@@ -364,7 +365,7 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
364365
}
365366
err = ptptesthelper.BasicClockSyncCheck(fullConfig, (*ptpv1.PtpConfig)(fullConfig.DiscoveredClockUnderTestPtpConfig), grandmasterID, metrics.MetricClockStateLocked, metrics.MetricRoleSlave, true)
366367
Expect(err).To(BeNil())
367-
if fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClock {
368+
if fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClock || fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClockHA {
368369
err = ptptesthelper.BasicClockSyncCheck(fullConfig, (*ptpv1.PtpConfig)(fullConfig.DiscoveredClockUnderTestSecondaryPtpConfig), grandmasterID, metrics.MetricClockStateLocked, metrics.MetricRoleSlave, true)
369370
Expect(err).To(BeNil())
370371
}
@@ -474,9 +475,11 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
474475
}
475476

476477
if fullConfig.PtpModeDiscovered != testconfig.BoundaryClock &&
477-
fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClock {
478+
fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClock &&
479+
fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClockHA {
478480
Skip("test only valid for Boundary clock in multi-node clusters")
479481
}
482+
480483
if !fullConfig.FoundSolutions[testconfig.AlgoBCWithSlavesString] &&
481484
!fullConfig.FoundSolutions[testconfig.AlgoDualNicBCWithSlavesString] &&
482485
!fullConfig.FoundSolutions[testconfig.AlgoBCWithSlavesExtGMString] &&
@@ -489,8 +492,8 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
489492
err = ptptesthelper.BasicClockSyncCheck(fullConfig, (*ptpv1.PtpConfig)(fullConfig.DiscoveredSlave1PtpConfig), &masterIDBc1, metrics.MetricClockStateLocked, metrics.MetricRoleSlave, true)
490493
Expect(err).To(BeNil())
491494

492-
if (fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClock) && (fullConfig.FoundSolutions[testconfig.AlgoDualNicBCWithSlavesExtGMString] ||
493-
fullConfig.FoundSolutions[testconfig.AlgoDualNicBCWithSlavesString]) {
495+
if (fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClock || fullConfig.PtpModeDiscovered == testconfig.DualNICBoundaryClockHA) &&
496+
(fullConfig.FoundSolutions[testconfig.AlgoDualNicBCWithSlavesExtGMString] || fullConfig.FoundSolutions[testconfig.AlgoDualNicBCWithSlavesString]) {
494497
aLabel := pkg.PtpClockUnderTestNodeLabel
495498
masterIDBc2, err := ptphelper.GetClockIDMaster(pkg.PtpBcMaster2PolicyName, &aLabel, nil, false)
496499
Expect(err).To(BeNil())
@@ -522,7 +525,7 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
522525
policyName = pkg.PtpSlave1PolicyName
523526
case testconfig.BoundaryClock:
524527
policyName = pkg.PtpBcMaster1PolicyName
525-
case testconfig.DualNICBoundaryClock:
528+
case testconfig.DualNICBoundaryClock, testconfig.DualNICBoundaryClockHA:
526529
policyName = pkg.PtpBcMaster1PolicyName
527530
}
528531
ptpConfigToModify, err := client.Client.PtpV1Interface.PtpConfigs(pkg.PtpLinuxDaemonNamespace).Get(context.Background(), policyName, metav1.GetOptions{})
@@ -575,6 +578,111 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
575578
}
576579
})
577580
})
581+
582+
It("DualNICBCHA phc2sys switches to secondary ptp4l when primary interface fails", func() {
583+
if fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClockHA {
584+
Skip("Test only valid for Dual NIC Boundary Clocks with phc2sys HA configuration (DualNICBCHA)")
585+
}
586+
587+
By("Identifying which interface phc2sys is currently using")
588+
primaryPtpConfig := (*ptpv1.PtpConfig)(fullConfig.DiscoveredClockUnderTestPtpConfig)
589+
primaryBCSlaveInterfaces := ptpv1.GetInterfaces(*primaryPtpConfig, ptpv1.Slave)
590+
591+
secondaryPtpConfig := (*ptpv1.PtpConfig)(fullConfig.DiscoveredClockUnderTestSecondaryPtpConfig)
592+
secondaryBCSlaveInterfaces := ptpv1.GetInterfaces(*secondaryPtpConfig, ptpv1.Slave)
593+
594+
logrus.Infof("Primary BC slave interfaces: %v", primaryBCSlaveInterfaces)
595+
logrus.Infof("Secondary BC slave interfaces: %v", secondaryBCSlaveInterfaces)
596+
597+
// Get phc2sys logs to identify which interface it's using
598+
const phc2sysLogPattern = `phc2sys(?m).*?:.* selecting (\w+) as out-of-domain source clock`
599+
var selectedInterface string
600+
601+
logMatches, err := pods.GetPodLogsRegex(fullConfig.DiscoveredClockUnderTestPod.Namespace,
602+
fullConfig.DiscoveredClockUnderTestPod.Name, pkg.PtpContainerName,
603+
phc2sysLogPattern, false, pkg.TimeoutIn1Minute)
604+
Expect(err).NotTo(HaveOccurred())
605+
Expect(len(logMatches)).To(BeNumerically("==", 1), "Could not identify which interface phc2sys is using")
606+
607+
logrus.Infof("phc2sys log matching line: %v", logMatches[0][0])
608+
selectedInterface = logMatches[0][1]
609+
610+
// Save it as primary interface
611+
primaryInterface := selectedInterface
612+
By("Verifying the selected interface " + selectedInterface + " is a primary BC's slave interface")
613+
// Check if the selected interface belongs to the primary boundary clock config
614+
615+
// Check if the selected interface belongs to the primary boundary clock config
616+
if !slices.Contains(primaryBCSlaveInterfaces, selectedInterface) {
617+
Fail(fmt.Sprintf("Selected interface %s does not belong to the primary boundary clock config. Primary interfaces: %v", selectedInterface, primaryBCSlaveInterfaces))
618+
}
619+
620+
// Wait for some time to ensure the regex won't match the previous log entry
621+
time.Sleep(2 * time.Second)
622+
ifDownTime := time.Now()
623+
By("Taking down the selected interface " + selectedInterface)
624+
625+
// Use the PortEngine to turn down the interface so phc2sys will switch to the slave interface in the secondary boundary clock
626+
portEngine.TurnPortDown(selectedInterface)
627+
628+
// Wait for the interface to be down and ptp4l to report freerun
629+
Eventually(func() error {
630+
return metrics.CheckClockRole([]metrics.MetricRole{metrics.MetricRoleFaulty}, []string{selectedInterface}, &fullConfig.DiscoveredClockUnderTestPod.Spec.NodeName)
631+
}, 30*time.Second, 5*time.Second).Should(BeNil(), "Primary BC's slave interface "+selectedInterface+" should be in FAULTY state")
632+
633+
By("Waiting for 5 seconds for the new interface to be selected")
634+
time.Sleep(5 * time.Second)
635+
636+
By("Verifying phc2sys switches to a different interface")
637+
// Wait for phc2sys to switch to a different interface
638+
var newSelectedInterface string
639+
logMatches, err = pods.GetPodLogsRegexSince(fullConfig.DiscoveredClockUnderTestPod.Namespace,
640+
fullConfig.DiscoveredClockUnderTestPod.Name, pkg.PtpContainerName,
641+
phc2sysLogPattern, false, pkg.TimeoutIn1Minute, ifDownTime)
642+
Expect(err).NotTo(HaveOccurred())
643+
Expect(len(logMatches)).To(BeNumerically("==", 1), "Could not identify which interface phc2sys is using")
644+
// Get the most recent log entry
645+
logrus.Infof("phc2sys log matching line: %v", logMatches[0][0])
646+
newSelectedInterface = logMatches[0][1]
647+
648+
// Verify that phc2sys switched to a different interface
649+
Expect(newSelectedInterface).ToNot(Equal(selectedInterface), "phc2sys should have switched to a different interface")
650+
651+
By("Verifying the new selected interface " + newSelectedInterface + " is a secondary BC's slave interface")
652+
// Check if the selected interface belongs to the primary boundary clock config
653+
if !slices.Contains(secondaryBCSlaveInterfaces, newSelectedInterface) {
654+
Fail(fmt.Sprintf("Selected interface %s does not belong to the secondary boundary clock config. Secondary interfaces: %v", newSelectedInterface, secondaryBCSlaveInterfaces))
655+
}
656+
657+
time.Sleep(2 * time.Second)
658+
ifUpTime := time.Now()
659+
By("Restoring the primary BC's slave interface " + primaryInterface)
660+
// Bring the interface back up
661+
portEngine.TurnPortUp(primaryInterface)
662+
663+
// Wait for the interface to recover to SLAVE state
664+
Eventually(func() error {
665+
return metrics.CheckClockRole([]metrics.MetricRole{metrics.MetricRoleSlave}, []string{primaryInterface}, &fullConfig.DiscoveredClockUnderTestPod.Spec.NodeName)
666+
}, 30*time.Second, 5*time.Second).Should(BeNil(), "Primary BC's slave interface "+primaryInterface+" should recover to SLAVE state")
667+
668+
By("Waiting 5 seconds for the primary BC's slave interface to be selected again")
669+
time.Sleep(5 * time.Second)
670+
671+
// Check the new interfaces is the primary one
672+
logMatches, err = pods.GetPodLogsRegexSince(fullConfig.DiscoveredClockUnderTestPod.Namespace,
673+
fullConfig.DiscoveredClockUnderTestPod.Name, pkg.PtpContainerName,
674+
phc2sysLogPattern, false, pkg.TimeoutIn1Minute, ifUpTime)
675+
Expect(err).NotTo(HaveOccurred())
676+
Expect(len(logMatches)).To(BeNumerically("==", 1), "Could not identify which interface phc2sys is using")
677+
// Get the most recent log entry
678+
logrus.Infof("phc2sys log matching line: %v", logMatches[0][0])
679+
selectedInterface = logMatches[0][1]
680+
681+
By("Verifying the selected interface " + selectedInterface + " is the original primary BC's slave interface " + primaryInterface)
682+
if selectedInterface != primaryInterface {
683+
Fail(fmt.Sprintf("Selected interface %s is not the original primary interface %s", selectedInterface, primaryInterface))
684+
}
685+
})
578686
})
579687

580688
Context("PTP metric is present", func() {

test/pkg/clean/clean.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ func Configs() {
6363
ptpConfig.Name == pkg.PtpSlave1PolicyName ||
6464
ptpConfig.Name == pkg.PtpBcMaster2PolicyName ||
6565
ptpConfig.Name == pkg.PtpSlave2PolicyName ||
66-
ptpConfig.Name == pkg.PtpTempPolicyName {
66+
ptpConfig.Name == pkg.PtpTempPolicyName ||
67+
ptpConfig.Name == pkg.PtpDualNicBCHAPolicyName {
6768
err = client.Client.PtpConfigs(pkg.PtpLinuxDaemonNamespace).Delete(context.Background(), ptpConfig.Name, metav1.DeleteOptions{})
6869
if err != nil {
6970
logrus.Infof("clean.All: Failed to delete ptp config %s %v", ptpConfig.Name, err)

test/pkg/consts.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const (
3131
PtpSlave1PolicyName = "test-slave1"
3232
PtpBcMaster2PolicyName = "test-bc-master2"
3333
PtpSlave2PolicyName = "test-slave2"
34+
PtpDualNicBCHAPolicyName = "test-dual-nic-bc-ha"
3435
PtpTempPolicyName = "temp"
3536

3637
// node labels

test/pkg/pods/pods.go

Lines changed: 75 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package pods
33
import (
44
"bytes"
55
"context"
6+
"errors"
67
"fmt"
78
"io"
89
"regexp"
@@ -137,6 +138,71 @@ func WaitForPhase(cs *testclient.ClientSet, pod *corev1.Pod, phaseType corev1.Po
137138
})
138139
}
139140

141+
func findRegexInStream(stream io.ReadCloser, r *regexp.Regexp, timeout time.Duration) (matches [][]string, err error) {
142+
logContent := ""
143+
buf := make([]byte, 2000)
144+
145+
for start := time.Now(); time.Since(start) <= timeout && len(matches) == 0; {
146+
numBytes, err := stream.Read(buf)
147+
if err != nil {
148+
if err == io.EOF {
149+
logContent += string(buf[:numBytes])
150+
matches = r.FindAllStringSubmatch(logContent, -1)
151+
break
152+
} else {
153+
return nil, fmt.Errorf("error reading from stream: %s", err)
154+
}
155+
}
156+
157+
if numBytes == 0 {
158+
time.Sleep(100 * time.Millisecond)
159+
continue
160+
}
161+
162+
logContent += string(buf[:numBytes])
163+
matches = r.FindAllStringSubmatch(logContent, -1)
164+
}
165+
166+
if len(matches) == 0 {
167+
return matches, errors.New("timedout waiting for matches")
168+
}
169+
170+
return matches, nil
171+
}
172+
173+
// returns last Regex match in the logs for a given pod
174+
func GetPodLogsRegexSince(namespace string, podName string, containerName, regex string, isLiteralText bool, timeout time.Duration, since time.Time) (matches [][]string, err error) {
175+
const matchOnlyFullLines = `\s*^`
176+
if isLiteralText {
177+
regex = regexp.QuoteMeta(regex)
178+
} else {
179+
regex += matchOnlyFullLines
180+
}
181+
182+
r := regexp.MustCompile(regex)
183+
184+
podLogOptions := corev1.PodLogOptions{
185+
Container: containerName,
186+
Follow: true,
187+
SinceTime: &metav1.Time{Time: since},
188+
}
189+
190+
podLogRequest := testclient.Client.CoreV1().Pods(namespace).GetLogs(podName, &podLogOptions)
191+
192+
stream, err := podLogRequest.Stream(context.TODO())
193+
if err != nil {
194+
return matches, fmt.Errorf("failed to open log streamn for %s/%s container=%s, err=%s", namespace, podName, containerName, err)
195+
}
196+
defer stream.Close()
197+
198+
matches, err = findRegexInStream(stream, r, timeout)
199+
if err != nil {
200+
return matches, fmt.Errorf("could not find regex in log stream for %s/%s container=%s, err=%s", namespace, podName, containerName, err)
201+
}
202+
203+
return matches, nil
204+
}
205+
140206
// returns last Regex match in the logs for a given pod
141207
func GetPodLogsRegex(namespace string, podName string, containerName, regex string, isLiteralText bool, timeout time.Duration) (matches [][]string, err error) {
142208
const matchOnlyFullLines = `\s*^`
@@ -145,46 +211,27 @@ func GetPodLogsRegex(namespace string, podName string, containerName, regex stri
145211
} else {
146212
regex += matchOnlyFullLines
147213
}
148-
//count := int64(100)
214+
215+
r := regexp.MustCompile(regex)
216+
149217
podLogOptions := corev1.PodLogOptions{
150218
Container: containerName,
151219
Follow: true,
152-
//TailLines: &count,
153220
}
154221

155222
podLogRequest := testclient.Client.CoreV1().Pods(namespace).GetLogs(podName, &podLogOptions)
223+
156224
stream, err := podLogRequest.Stream(context.TODO())
157225
if err != nil {
158-
return matches, fmt.Errorf("could not retrieve log in ns=%s pod=%s, err=%s", namespace, podName, err)
226+
return matches, fmt.Errorf("failed to open log streamn for %s/%s container=%s, err=%s", namespace, podName, containerName, err)
159227
}
160228
defer stream.Close()
161-
start := time.Now()
162-
message := ""
163-
for {
164-
t := time.Now()
165-
elapsed := t.Sub(start)
166-
if elapsed > timeout {
167-
return matches, fmt.Errorf("timedout waiting for log in ns=%s pod=%s, looking for = %s", namespace, podName, regex)
168-
}
169-
buf := make([]byte, 2000)
170-
numBytes, err := stream.Read(buf)
171-
if numBytes == 0 {
172-
continue
173-
}
174-
if err == io.EOF {
175-
break
176-
}
177-
if err != nil {
178-
return matches, fmt.Errorf("error getting log stream in ns=%s pod=%s, err=%s", namespace, podName, err)
179-
}
180-
message += string(buf[:numBytes])
181-
r := regexp.MustCompile(regex)
182-
matches = r.FindAllStringSubmatch(message, -1)
183-
if len(matches) > 0 {
184-
return matches, nil
185-
}
186229

230+
matches, err = findRegexInStream(stream, r, timeout)
231+
if err != nil {
232+
return matches, fmt.Errorf("could not find regex in log stream for %s/%s container=%s, err=%s", namespace, podName, containerName, err)
187233
}
234+
188235
return matches, nil
189236
}
190237

test/pkg/ptphelper/ptphelper.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,24 @@ func IsSecondaryBc(config *ptpv1.PtpConfig) bool {
651651
return true
652652
}
653653

654+
// Checks if the ptpSettings has more than one HA profile
655+
func hasHaProfiles(ptpSettings map[string]string) bool {
656+
logrus.Infof("Checking if ptpSettings %v has more than one HA profile", ptpSettings)
657+
return ptpSettings != nil && ptpSettings["haProfiles"] != "" && len(strings.Split(ptpSettings["haProfiles"], ",")) > 1
658+
}
659+
660+
// Checks for DualNIC BC HA
661+
func ConfigIsPhc2SysHa(config *ptpv1.PtpConfig) bool {
662+
logrus.Infof("Checking if config %s is Phc2Sys HA", config.Name)
663+
for _, profile := range config.Spec.Profile {
664+
if profile.Phc2sysOpts != nil && profile.Ptp4lOpts != nil && *profile.Ptp4lOpts == "" && hasHaProfiles(profile.PtpSettings) {
665+
logrus.Infof("Config %s is Phc2Sys HA", config.Name)
666+
return true
667+
}
668+
}
669+
return false
670+
}
671+
654672
// Checks for OC
655673
func IsPtpSlave(ptp4lOpts, phc2sysOpts *string) bool {
656674
return /*strings.Contains(*ptp4lOpts, "-s") &&*/ ((phc2sysOpts != nil && (strings.Count(*phc2sysOpts, "-a") == 1 && strings.Count(*phc2sysOpts, "-r") == 1)) ||

0 commit comments

Comments
 (0)