Skip to content

Commit 2fa523b

Browse files
committed
CI: check clockclass after cloud-event-proxy process restart
Signed-off-by: Jack Ding <jackding@gmail.com>
1 parent 4ce4066 commit 2fa523b

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed

test/conformance/serial/ptp.go

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ const (
6464
var (
6565
clockClassPattern = `^openshift_ptp_clock_class\{(?:config="ptp4l\.\d+\.config",)?node="([^"]+)",process="([^"]+)"\}\s+(\d+)`
6666
clockClassRe = regexp.MustCompile(clockClassPattern)
67+
68+
// pmcClockClassRe parses gm.ClockClass from "pmc GET PARENT_DATA_SET" output
69+
pmcClockClassRe = regexp.MustCompile(`gm\.ClockClass\s+(\d+)`)
6770
)
6871
var DesiredMode = testconfig.GetDesiredConfig(true).PtpModeDesired
6972

@@ -1149,6 +1152,90 @@ var _ = Describe("["+strings.ToLower(DesiredMode.String())+"-serial]", Serial, f
11491152
"Threshold metrics are not detected")
11501153
})
11511154

1155+
It("Should recover clockClass via event API after cloud-event-proxy crash", func() {
1156+
if ptphelper.PtpEventEnabled() != 2 {
1157+
Skip("Skipping: test applies to event API v2 only")
1158+
}
1159+
if fullConfig.PtpModeDiscovered != testconfig.BoundaryClock &&
1160+
fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClock &&
1161+
fullConfig.PtpModeDiscovered != testconfig.DualNICBoundaryClockHA {
1162+
Skip("Skipping: test applies to boundary clock configurations only")
1163+
}
1164+
1165+
By("Deploying consumer app for event API v2")
1166+
nodeName := fullConfig.DiscoveredClockUnderTestPod.Spec.NodeName
1167+
Expect(nodeName).ToNot(BeEmpty(), "clock-under-test pod node is empty")
1168+
err := event.CreateConsumerApp(nodeName)
1169+
if err != nil {
1170+
Skip(fmt.Sprintf("Consumer app setup failed: %v", err))
1171+
}
1172+
DeferCleanup(func() {
1173+
_ = event.DeleteConsumerNamespace()
1174+
if event.PubSub != nil {
1175+
event.PubSub.Close()
1176+
}
1177+
})
1178+
time.Sleep(10 * time.Second)
1179+
1180+
By("Verifying initial clockClass is 6 via metrics")
1181+
checkClockClassState(fullConfig, strconv.Itoa(int(fbprotocol.ClockClass6)))
1182+
1183+
By("Verifying initial clockClass is 6 via PMC")
1184+
checkClockClassViaPMC(fullConfig, strconv.Itoa(int(fbprotocol.ClockClass6)))
1185+
1186+
By("Setting up event monitoring and verifying initial clockClass is 6 via Event API")
1187+
event.InitPubSub()
1188+
term, monErr := event.MonitorPodLogsRegex()
1189+
Expect(monErr).ToNot(HaveOccurred(), "could not start listening to events")
1190+
DeferCleanup(func() { stopMonitor(term) })
1191+
verifyClockClassViaEventAPI(int(fbprotocol.ClockClass6), 60*time.Second)
1192+
1193+
By("Killing cloud-event-proxy process in sidecar container")
1194+
_, _, killErr := pods.ExecCommand(
1195+
client.Client,
1196+
true,
1197+
fullConfig.DiscoveredClockUnderTestPod,
1198+
pkg.EventProxyContainerName,
1199+
[]string{"sh", "-c", "kill -9 $(pgrep -f ^./cloud-event-proxy) || true"},
1200+
)
1201+
Expect(killErr).To(BeNil(), "failed to kill cloud-event-proxy process")
1202+
1203+
By("Waiting for cloud-event-proxy process to restart")
1204+
Eventually(func() bool {
1205+
buf, _, _ := pods.ExecCommand(
1206+
client.Client,
1207+
true,
1208+
fullConfig.DiscoveredClockUnderTestPod,
1209+
pkg.EventProxyContainerName,
1210+
[]string{"sh", "-c", "pgrep -f ^./cloud-event-proxy"},
1211+
)
1212+
return strings.TrimSpace(buf.String()) != ""
1213+
}, 3*time.Minute, 1*time.Second).Should(BeTrue(),
1214+
"cloud-event-proxy process did not restart within 3 minutes")
1215+
1216+
By("Waiting for cloud-event-proxy health endpoint to recover")
1217+
Eventually(func() string {
1218+
buf, _, _ := pods.ExecCommand(
1219+
client.Client,
1220+
false,
1221+
fullConfig.DiscoveredClockUnderTestPod,
1222+
pkg.EventProxyContainerName,
1223+
[]string{"curl", path.Join(event.ApiBaseV2, "health")},
1224+
)
1225+
return buf.String()
1226+
}, 2*time.Minute, 2*time.Second).Should(ContainSubstring("OK"),
1227+
"cloud-event-proxy health endpoint did not recover after restart")
1228+
1229+
By("Verifying clockClass remains 6 via metrics after cloud-event-proxy restart")
1230+
checkClockClassState(fullConfig, strconv.Itoa(int(fbprotocol.ClockClass6)))
1231+
1232+
By("Verifying clockClass remains 6 via PMC after cloud-event-proxy restart")
1233+
checkClockClassViaPMC(fullConfig, strconv.Itoa(int(fbprotocol.ClockClass6)))
1234+
1235+
By("Verifying clockClass is 6 via Event API after cloud-event-proxy restart")
1236+
verifyClockClassViaEventAPI(int(fbprotocol.ClockClass6), 90*time.Second)
1237+
})
1238+
11521239
Context("Event API version validation", func() {
11531240
BeforeEach(func() {
11541241
if !ptphelper.IsPTPOperatorVersionAtLeast("4.19") {
@@ -2811,6 +2898,30 @@ func checkClockClassState(fullConfig testconfig.TestConfig, expectedState string
28112898
fmt.Sprintf("Expected ptp4l clock class to eventually be %s for GM", expectedState))
28122899
}
28132900

2901+
// checkClockClassViaPMC verifies clock class by running "pmc GET PARENT_DATA_SET"
2902+
// and parsing the gm.ClockClass field from the output.
2903+
func checkClockClassViaPMC(fullConfig testconfig.TestConfig, expectedClockClass string) {
2904+
By(fmt.Sprintf("Verifying gm.ClockClass is %s via PMC PARENT_DATA_SET", expectedClockClass))
2905+
Eventually(func() bool {
2906+
buf, _, err := pods.ExecCommand(client.Client, true,
2907+
fullConfig.DiscoveredClockUnderTestPod, pkg.PtpContainerName,
2908+
[]string{"pmc", "-b", "0", "-u", "-f", "/var/run/ptp4l.0.config", "GET PARENT_DATA_SET"})
2909+
if err != nil {
2910+
fmt.Fprintf(GinkgoWriter, "PMC exec error: %v\n", err)
2911+
return false
2912+
}
2913+
output := buf.String()
2914+
matches := pmcClockClassRe.FindStringSubmatch(output)
2915+
if len(matches) < 2 {
2916+
fmt.Fprintf(GinkgoWriter, "PMC: gm.ClockClass not found in output: %s\n", output)
2917+
return false
2918+
}
2919+
fmt.Fprintf(GinkgoWriter, "PMC: gm.ClockClass=%s (expected %s)\n", matches[1], expectedClockClass)
2920+
return strings.TrimSpace(matches[1]) == expectedClockClass
2921+
}, pkg.TimeoutIn3Minutes, 2*time.Second).Should(BeTrue(),
2922+
fmt.Sprintf("Expected gm.ClockClass %s via PMC but did not get it", expectedClockClass))
2923+
}
2924+
28142925
func checkDPLLFrequencyState(fullConfig testconfig.TestConfig, state string) {
28152926
/*
28162927
# TODO: Revisit this for 2 card as each card will have its own dpll process
@@ -3301,3 +3412,28 @@ func waitForStateAndCC(subs event.Subscriptions, state ptpEvent.SyncState, cc in
33013412
}
33023413
}
33033414
}
3415+
3416+
// verifyClockClassViaEventAPI verifies clock class via Event Fast Notification API getCurrentState
3417+
func verifyClockClassViaEventAPI(expectedClockClass int, timeout time.Duration) {
3418+
const incomingEventsBuffer = 100
3419+
subs, cleanup := event.SubscribeToGMChangeEvents(incomingEventsBuffer, true, timeout)
3420+
defer cleanup()
3421+
3422+
timer := time.NewTimer(timeout)
3423+
defer timer.Stop()
3424+
3425+
for {
3426+
select {
3427+
case <-timer.C:
3428+
Fail(fmt.Sprintf("Timed out waiting for clockClass %d via Event API", expectedClockClass))
3429+
return
3430+
case ev := <-subs.CLOCKCLASS:
3431+
if res, ok := processEvent(ptpEvent.PtpClockClassChange, ev); ok {
3432+
if v, ok2 := res.Values["metric"].(float64); ok2 && int(v) == expectedClockClass {
3433+
fmt.Fprintf(GinkgoWriter, "ClockClass %d verified via Event API\n", expectedClockClass)
3434+
return
3435+
}
3436+
}
3437+
}
3438+
}
3439+
}

0 commit comments

Comments
 (0)