From 46b540dfade99ea7d979f77ec09cfa471989142b Mon Sep 17 00:00:00 2001 From: Kirsten Laskoski Date: Thu, 26 Mar 2026 10:40:42 -0400 Subject: [PATCH] cnf ran ptp: refactor OC 2 port 80963 This commit refactors the OC 2 port test 80963 to avoid a consistent failure we see in CI. The failover process is not instant, and may bounce into FREERUN briefly during the sync on the passive port. These are an artifact of how the feature works and not product bugs, so this commit updates the test to be less strict about these. Assisted-by: Cursor --- tests/cnf/ran/ptp/tests/ptp-oc-2-port.go | 86 ++++++++++-------------- 1 file changed, 36 insertions(+), 50 deletions(-) diff --git a/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go b/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go index fcb5d74c0..0351655be 100644 --- a/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go +++ b/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go @@ -77,13 +77,6 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt By("Restoring OC 2-port interfaces") restoreOc2PortAndValidate(context.TODO(), prometheusAPI, nodeName, oc2PortInfo.Interfaces) }) - By("getting event consumer pod for the node") - - eventPod, err := consumer.GetConsumerPodforNode(RANConfig.Spoke1APIClient, nodeName) - Expect(err).ToNot(HaveOccurred(), "Failed to get event consumer pod for node %s", nodeName) - - startTime := time.Now() - By("bringing down the active interface to cause a failover") err = iface.SetInterfaceStatus( @@ -91,31 +84,7 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt Expect(err).ToNot(HaveOccurred(), "Failed to set interface %s to down on node %s", oc2PortInfo.ActiveInterface, nodeName) - By("validating PTP clock class metric remains 6 after failover") - - clockClassQuery := metrics.ClockClassQuery{ - Node: metrics.Equals(nodeName), - Process: metrics.Equals(metrics.ProcessPTP4L), - } - err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockClassQuery, metrics.ClockClass6, - metrics.AssertWithStableDuration(10*time.Second), - metrics.AssertWithTimeout(45*time.Second)) - Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP clock class metric remains 6 after failover") - - By("validating PTP clock state metric remains LOCKED after failover") - - clockStateQuery := metrics.ClockStateQuery{ - Node: metrics.Equals(nodeName), - Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), - } - err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockStateQuery, metrics.ClockStateLocked, - metrics.AssertWithStableDuration(10*time.Second), - metrics.AssertWithTimeout(45*time.Second)) - Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP process metric stays in LOCKED state after failover") - - By("validating PTP initial active interface role metric change to FAULTY after failover") + By("validating active interface transitions to FAULTY after failover") interfaceRoleQuery := metrics.InterfaceRoleQuery{ Interface: metrics.Equals(oc2PortInfo.ActiveInterface), @@ -125,9 +94,10 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt err = metrics.AssertQuery(context.TODO(), prometheusAPI, interfaceRoleQuery, metrics.InterfaceRoleFaulty, metrics.AssertWithTimeout(45*time.Second)) Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP active interface role metric changed to FAULTY after failover") + "Role swap failed: active interface %s did not become FAULTY within %s", + oc2PortInfo.ActiveInterface, 45*time.Second) - By("validating PTP passive interface role metric changed to SLAVE after failover") + By("validating passive interface transitions to FOLLOWER after failover") interfaceRoleQuery = metrics.InterfaceRoleQuery{ Interface: metrics.Equals(oc2PortInfo.PassiveInterface), @@ -137,17 +107,33 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt err = metrics.AssertQuery(context.TODO(), prometheusAPI, interfaceRoleQuery, metrics.InterfaceRoleFollower, metrics.AssertWithTimeout(45*time.Second)) Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP passive interface role metric changed to SLAVE after failover") + "Role swap failed: passive interface %s did not become FOLLOWER within %s", + oc2PortInfo.PassiveInterface, 45*time.Second) - By("validating no FREERUN event is generated after failover") + By("validating PTP processes relock after failover") - freerunFilter := events.All( - events.IsType(eventptp.PtpStateChange), - events.HasValue(events.WithSyncState(eventptp.FREERUN), events.OnInterface(oc2PortInfo.IfaceGroup)), - ) - err = events.WaitForEvent(eventPod, startTime, 1*time.Minute, freerunFilter) - Expect(err).To(HaveOccurred(), - "Unexpected FREERUN event detected for interface %s", oc2PortInfo.ActiveInterface) + clockStateQuery := metrics.ClockStateQuery{ + Node: metrics.Equals(nodeName), + Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), + } + err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockStateQuery, metrics.ClockStateLocked, + metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithTimeout(90*time.Second)) + Expect(err).ToNot(HaveOccurred(), + "Relock failed: ptp4l and phc2sys did not return to LOCKED within %s", + 90*time.Second) + + By("validating PTP clock class returns to 6 after failover convergence") + + clockClassQuery := metrics.ClockClassQuery{ + Node: metrics.Equals(nodeName), + Process: metrics.Equals(metrics.ProcessPTP4L), + } + err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockClassQuery, metrics.ClockClass6, + metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithTimeout(90*time.Second)) + Expect(err).ToNot(HaveOccurred(), + "Relock failed: clock class did not return to 6 within %s", 90*time.Second) By("restoring OC 2-port interfaces before test completion") restoreOc2PortAndValidate(context.TODO(), prometheusAPI, nodeName, oc2PortInfo.Interfaces) @@ -430,20 +416,20 @@ func restoreOc2PortAndValidate( oc2PortInterface.Name, nodeName) } - By("validating OC 2-port clock state returns to LOCKED") + By("validating OC 2-port active/passive roles stabilize after restoration") + + waitForOc2PortActivePassive(ctx, prometheusAPI, nodeName, oc2PortInterfaces, time.Minute) + + By("validating OC 2-port clock state returns to LOCKED after restoration") clockStateQuery := metrics.ClockStateQuery{ Node: metrics.Equals(nodeName), Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), } err := metrics.AssertQuery(ctx, prometheusAPI, clockStateQuery, metrics.ClockStateLocked, - metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithStableDuration(5*time.Second), metrics.AssertWithTimeout(3*time.Minute)) - Expect(err).ToNot(HaveOccurred(), "Failed to assert clock state is LOCKED after restoration") - - By("validating OC 2-port active/passive roles after restoration") - - waitForOc2PortActivePassive(ctx, prometheusAPI, nodeName, oc2PortInterfaces, 30*time.Second) + Expect(err).ToNot(HaveOccurred(), "Restore failed: clock state did not return to LOCKED after restoration") } // waitForOc2PortActivePassive waits for OC 2-port roles to stabilize.