@@ -77,45 +77,14 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt
7777 By ("Restoring OC 2-port interfaces" )
7878 restoreOc2PortAndValidate (context .TODO (), prometheusAPI , nodeName , oc2PortInfo .Interfaces )
7979 })
80- By ("getting event consumer pod for the node" )
81-
82- eventPod , err := consumer .GetConsumerPodforNode (RANConfig .Spoke1APIClient , nodeName )
83- Expect (err ).ToNot (HaveOccurred (), "Failed to get event consumer pod for node %s" , nodeName )
84-
85- startTime := time .Now ()
86-
8780 By ("bringing down the active interface to cause a failover" )
8881
8982 err = iface .SetInterfaceStatus (
9083 RANConfig .Spoke1APIClient , nodeName , oc2PortInfo .ActiveInterface , iface .InterfaceStateDown )
9184 Expect (err ).ToNot (HaveOccurred (),
9285 "Failed to set interface %s to down on node %s" , oc2PortInfo .ActiveInterface , nodeName )
9386
94- By ("validating PTP clock class metric remains 6 after failover" )
95-
96- clockClassQuery := metrics.ClockClassQuery {
97- Node : metrics .Equals (nodeName ),
98- Process : metrics .Equals (metrics .ProcessPTP4L ),
99- }
100- err = metrics .AssertQuery (context .TODO (), prometheusAPI , clockClassQuery , metrics .ClockClass6 ,
101- metrics .AssertWithStableDuration (10 * time .Second ),
102- metrics .AssertWithTimeout (45 * time .Second ))
103- Expect (err ).ToNot (HaveOccurred (),
104- "Failed to assert that the PTP clock class metric remains 6 after failover" )
105-
106- By ("validating PTP clock state metric remains LOCKED after failover" )
107-
108- clockStateQuery := metrics.ClockStateQuery {
109- Node : metrics .Equals (nodeName ),
110- Process : metrics .Includes (metrics .ProcessPTP4L , metrics .ProcessPHC2SYS ),
111- }
112- err = metrics .AssertQuery (context .TODO (), prometheusAPI , clockStateQuery , metrics .ClockStateLocked ,
113- metrics .AssertWithStableDuration (10 * time .Second ),
114- metrics .AssertWithTimeout (45 * time .Second ))
115- Expect (err ).ToNot (HaveOccurred (),
116- "Failed to assert that the PTP process metric stays in LOCKED state after failover" )
117-
118- By ("validating PTP initial active interface role metric change to FAULTY after failover" )
87+ By ("validating active interface transitions to FAULTY after failover" )
11988
12089 interfaceRoleQuery := metrics.InterfaceRoleQuery {
12190 Interface : metrics .Equals (oc2PortInfo .ActiveInterface ),
@@ -125,9 +94,10 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt
12594 err = metrics .AssertQuery (context .TODO (), prometheusAPI , interfaceRoleQuery , metrics .InterfaceRoleFaulty ,
12695 metrics .AssertWithTimeout (45 * time .Second ))
12796 Expect (err ).ToNot (HaveOccurred (),
128- "Failed to assert that the PTP active interface role metric changed to FAULTY after failover" )
97+ "Role swap failed: active interface %s did not become FAULTY within %s" ,
98+ oc2PortInfo .ActiveInterface , 45 * time .Second )
12999
130- By ("validating PTP passive interface role metric changed to SLAVE after failover" )
100+ By ("validating passive interface transitions to FOLLOWER after failover" )
131101
132102 interfaceRoleQuery = metrics.InterfaceRoleQuery {
133103 Interface : metrics .Equals (oc2PortInfo .PassiveInterface ),
@@ -137,17 +107,33 @@ var _ = Describe("PTP OC 2-port", Label(tsparams.LabelOC2Port, tsparams.LabelInt
137107 err = metrics .AssertQuery (context .TODO (), prometheusAPI , interfaceRoleQuery , metrics .InterfaceRoleFollower ,
138108 metrics .AssertWithTimeout (45 * time .Second ))
139109 Expect (err ).ToNot (HaveOccurred (),
140- "Failed to assert that the PTP passive interface role metric changed to SLAVE after failover" )
110+ "Role swap failed: passive interface %s did not become FOLLOWER within %s" ,
111+ oc2PortInfo .PassiveInterface , 45 * time .Second )
141112
142- By ("validating no FREERUN event is generated after failover" )
113+ By ("validating PTP processes relock after failover" )
143114
144- freerunFilter := events .All (
145- events .IsType (eventptp .PtpStateChange ),
146- events .HasValue (events .WithSyncState (eventptp .FREERUN ), events .OnInterface (oc2PortInfo .IfaceGroup )),
147- )
148- err = events .WaitForEvent (eventPod , startTime , 1 * time .Minute , freerunFilter )
149- Expect (err ).To (HaveOccurred (),
150- "Unexpected FREERUN event detected for interface %s" , oc2PortInfo .ActiveInterface )
115+ clockStateQuery := metrics.ClockStateQuery {
116+ Node : metrics .Equals (nodeName ),
117+ Process : metrics .Includes (metrics .ProcessPTP4L , metrics .ProcessPHC2SYS ),
118+ }
119+ err = metrics .AssertQuery (context .TODO (), prometheusAPI , clockStateQuery , metrics .ClockStateLocked ,
120+ metrics .AssertWithStableDuration (10 * time .Second ),
121+ metrics .AssertWithTimeout (90 * time .Second ))
122+ Expect (err ).ToNot (HaveOccurred (),
123+ "Relock failed: ptp4l and phc2sys did not return to LOCKED within %s" ,
124+ 90 * time .Second )
125+
126+ By ("validating PTP clock class returns to 6 after failover convergence" )
127+
128+ clockClassQuery := metrics.ClockClassQuery {
129+ Node : metrics .Equals (nodeName ),
130+ Process : metrics .Equals (metrics .ProcessPTP4L ),
131+ }
132+ err = metrics .AssertQuery (context .TODO (), prometheusAPI , clockClassQuery , metrics .ClockClass6 ,
133+ metrics .AssertWithStableDuration (10 * time .Second ),
134+ metrics .AssertWithTimeout (90 * time .Second ))
135+ Expect (err ).ToNot (HaveOccurred (),
136+ "Relock failed: clock class did not return to 6 within %s" , 90 * time .Second )
151137
152138 By ("restoring OC 2-port interfaces before test completion" )
153139 restoreOc2PortAndValidate (context .TODO (), prometheusAPI , nodeName , oc2PortInfo .Interfaces )
@@ -430,20 +416,20 @@ func restoreOc2PortAndValidate(
430416 oc2PortInterface .Name , nodeName )
431417 }
432418
433- By ("validating OC 2-port clock state returns to LOCKED" )
419+ By ("validating OC 2-port active/passive roles stabilize after restoration" )
420+
421+ waitForOc2PortActivePassive (ctx , prometheusAPI , nodeName , oc2PortInterfaces , 30 * time .Second )
422+
423+ By ("validating OC 2-port clock state returns to LOCKED after restoration" )
434424
435425 clockStateQuery := metrics.ClockStateQuery {
436426 Node : metrics .Equals (nodeName ),
437427 Process : metrics .Includes (metrics .ProcessPTP4L , metrics .ProcessPHC2SYS ),
438428 }
439429 err := metrics .AssertQuery (ctx , prometheusAPI , clockStateQuery , metrics .ClockStateLocked ,
440- metrics .AssertWithStableDuration (10 * time .Second ),
430+ metrics .AssertWithStableDuration (5 * time .Second ),
441431 metrics .AssertWithTimeout (3 * time .Minute ))
442- Expect (err ).ToNot (HaveOccurred (), "Failed to assert clock state is LOCKED after restoration" )
443-
444- By ("validating OC 2-port active/passive roles after restoration" )
445-
446- waitForOc2PortActivePassive (ctx , prometheusAPI , nodeName , oc2PortInterfaces , 30 * time .Second )
432+ Expect (err ).ToNot (HaveOccurred (), "Restore failed: clock state did not return to LOCKED after restoration" )
447433}
448434
449435// waitForOc2PortActivePassive waits for OC 2-port roles to stabilize.
0 commit comments