diff --git a/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go b/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go index fcb5d74c0..0351655be 100644 --- a/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go +++ b/tests/cnf/ran/ptp/tests/ptp-oc-2-port.go @@ -77,13 +77,6 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt By("Restoring OC 2-port interfaces") restoreOc2PortAndValidate(context.TODO(), prometheusAPI, nodeName, oc2PortInfo.Interfaces) }) - By("getting event consumer pod for the node") - - eventPod, err := consumer.GetConsumerPodforNode(RANConfig.Spoke1APIClient, nodeName) - Expect(err).ToNot(HaveOccurred(), "Failed to get event consumer pod for node %s", nodeName) - - startTime := time.Now() - By("bringing down the active interface to cause a failover") err = iface.SetInterfaceStatus( @@ -91,31 +84,7 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt Expect(err).ToNot(HaveOccurred(), "Failed to set interface %s to down on node %s", oc2PortInfo.ActiveInterface, nodeName) - By("validating PTP clock class metric remains 6 after failover") - - clockClassQuery := metrics.ClockClassQuery{ - Node: metrics.Equals(nodeName), - Process: metrics.Equals(metrics.ProcessPTP4L), - } - err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockClassQuery, metrics.ClockClass6, - metrics.AssertWithStableDuration(10*time.Second), - metrics.AssertWithTimeout(45*time.Second)) - Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP clock class metric remains 6 after failover") - - By("validating PTP clock state metric remains LOCKED after failover") - - clockStateQuery := metrics.ClockStateQuery{ - Node: metrics.Equals(nodeName), - Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), - } - err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockStateQuery, metrics.ClockStateLocked, - metrics.AssertWithStableDuration(10*time.Second), - metrics.AssertWithTimeout(45*time.Second)) - Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP process metric stays in LOCKED state after failover") - - By("validating PTP initial active interface role metric change to FAULTY after failover") + By("validating active interface transitions to FAULTY after failover") interfaceRoleQuery := metrics.InterfaceRoleQuery{ Interface: metrics.Equals(oc2PortInfo.ActiveInterface), @@ -125,9 +94,10 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt err = metrics.AssertQuery(context.TODO(), prometheusAPI, interfaceRoleQuery, metrics.InterfaceRoleFaulty, metrics.AssertWithTimeout(45*time.Second)) Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP active interface role metric changed to FAULTY after failover") + "Role swap failed: active interface %s did not become FAULTY within %s", + oc2PortInfo.ActiveInterface, 45*time.Second) - By("validating PTP passive interface role metric changed to SLAVE after failover") + By("validating passive interface transitions to FOLLOWER after failover") interfaceRoleQuery = metrics.InterfaceRoleQuery{ Interface: metrics.Equals(oc2PortInfo.PassiveInterface), @@ -137,17 +107,33 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt err = metrics.AssertQuery(context.TODO(), prometheusAPI, interfaceRoleQuery, metrics.InterfaceRoleFollower, metrics.AssertWithTimeout(45*time.Second)) Expect(err).ToNot(HaveOccurred(), - "Failed to assert that the PTP passive interface role metric changed to SLAVE after failover") + "Role swap failed: passive interface %s did not become FOLLOWER within %s", + oc2PortInfo.PassiveInterface, 45*time.Second) - By("validating no FREERUN event is generated after failover") + By("validating PTP processes relock after failover") - freerunFilter := events.All( - events.IsType(eventptp.PtpStateChange), - events.HasValue(events.WithSyncState(eventptp.FREERUN), events.OnInterface(oc2PortInfo.IfaceGroup)), - ) - err = events.WaitForEvent(eventPod, startTime, 1*time.Minute, freerunFilter) - Expect(err).To(HaveOccurred(), - "Unexpected FREERUN event detected for interface %s", oc2PortInfo.ActiveInterface) + clockStateQuery := metrics.ClockStateQuery{ + Node: metrics.Equals(nodeName), + Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), + } + err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockStateQuery, metrics.ClockStateLocked, + metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithTimeout(90*time.Second)) + Expect(err).ToNot(HaveOccurred(), + "Relock failed: ptp4l and phc2sys did not return to LOCKED within %s", + 90*time.Second) + + By("validating PTP clock class returns to 6 after failover convergence") + + clockClassQuery := metrics.ClockClassQuery{ + Node: metrics.Equals(nodeName), + Process: metrics.Equals(metrics.ProcessPTP4L), + } + err = metrics.AssertQuery(context.TODO(), prometheusAPI, clockClassQuery, metrics.ClockClass6, + metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithTimeout(90*time.Second)) + Expect(err).ToNot(HaveOccurred(), + "Relock failed: clock class did not return to 6 within %s", 90*time.Second) By("restoring OC 2-port interfaces before test completion") restoreOc2PortAndValidate(context.TODO(), prometheusAPI, nodeName, oc2PortInfo.Interfaces) @@ -430,20 +416,20 @@ func restoreOc2PortAndValidate( oc2PortInterface.Name, nodeName) } - By("validating OC 2-port clock state returns to LOCKED") + By("validating OC 2-port active/passive roles stabilize after restoration") + + waitForOc2PortActivePassive(ctx, prometheusAPI, nodeName, oc2PortInterfaces, time.Minute) + + By("validating OC 2-port clock state returns to LOCKED after restoration") clockStateQuery := metrics.ClockStateQuery{ Node: metrics.Equals(nodeName), Process: metrics.Includes(metrics.ProcessPTP4L, metrics.ProcessPHC2SYS), } err := metrics.AssertQuery(ctx, prometheusAPI, clockStateQuery, metrics.ClockStateLocked, - metrics.AssertWithStableDuration(10*time.Second), + metrics.AssertWithStableDuration(5*time.Second), metrics.AssertWithTimeout(3*time.Minute)) - Expect(err).ToNot(HaveOccurred(), "Failed to assert clock state is LOCKED after restoration") - - By("validating OC 2-port active/passive roles after restoration") - - waitForOc2PortActivePassive(ctx, prometheusAPI, nodeName, oc2PortInterfaces, 30*time.Second) + Expect(err).ToNot(HaveOccurred(), "Restore failed: clock state did not return to LOCKED after restoration") } // waitForOc2PortActivePassive waits for OC 2-port roles to stabilize.