Skip to content

Commit 28b5fd2

Browse files
committed
cnf ran ptp: make OC cases events-optional
This commit uses the eventmetric package to fallback to metric assertions whenever PTP events are not enabled. Additionally, it flips the semantics of CurrentState. By default, the CurrentState logs are excluded since they don't represent events being sent to the consumer. They may be explicitly enabled when desired. There are a few complexities still, such as some assertions needing to be events-only due to the metric being too transient to capture. Additionally, the tests will now configure the ServiceMonitor such that Prometheus scrapes the metrics every second instead of the default 30 seconds. This improves the precision of metric assertions. Assisted-by: Cursor
1 parent 347d50f commit 28b5fd2

12 files changed

Lines changed: 286 additions & 134 deletions

File tree

tests/cnf/ran/ptp/internal/consumer/bothversions.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ func getEventAPIVersion(client *clients.Settings) (ptpEventAPIVersion, error) {
118118
return eventAPIVersionV1, nil
119119
case string(eventAPIVersionV2):
120120
return eventAPIVersionV2, nil
121+
case "":
122+
return eventAPIVersionV2, nil
121123
default:
122124
return "", fmt.Errorf("unknown event API version %s in PTP operator config", apiVersion)
123125
}

tests/cnf/ran/ptp/internal/eventmetric/eventmetric.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ import (
3737
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/consumer"
3838
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/events"
3939
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/metrics"
40+
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/tsparams"
4041
"golang.org/x/exp/constraints"
42+
"k8s.io/klog/v2"
4143
)
4244

4345
// AssertConfig is a struct that contains the configuration for the assertion. It combines the possible inputs to
@@ -155,6 +157,17 @@ func (assertConfig *AssertConfig[V]) ExecuteAssertion(ctx context.Context) error
155157
return fmt.Errorf("failed to check if events are enabled: %w", err)
156158
}
157159

160+
klog.V(tsparams.LogLevel).Infof(
161+
"Starting combined event/metric assertion: node=%s eventsEnabled=%t startTime=%s timeout=%s query=%s "+
162+
"expectedMetricValue=%d",
163+
assertConfig.NodeName,
164+
eventsEnabled,
165+
startTime,
166+
assertConfig.Timeout,
167+
assertConfig.MetricQuery.ToMetricQuery().String(),
168+
int64(assertConfig.ExpectedMetricValue),
169+
)
170+
158171
errChan := make(chan error, 2)
159172
waitGroup := sync.WaitGroup{}
160173

@@ -168,8 +181,14 @@ func (assertConfig *AssertConfig[V]) ExecuteAssertion(ctx context.Context) error
168181
err := events.WaitForEvent(
169182
eventPod, startTime, assertConfig.Timeout, assertConfig.EventFilter, assertConfig.EventOptions...)
170183
if err != nil {
184+
klog.V(tsparams.LogLevel).Infof("Event assertion failed for node %s: %v", assertConfig.NodeName, err)
185+
171186
errChan <- fmt.Errorf("event assertion failed: %w", err)
187+
188+
return
172189
}
190+
191+
klog.V(tsparams.LogLevel).Infof("Event assertion passed for node %s", assertConfig.NodeName)
173192
})
174193
}
175194

@@ -187,8 +206,19 @@ func (assertConfig *AssertConfig[V]) ExecuteAssertion(ctx context.Context) error
187206
metricOptions...,
188207
)
189208
if err != nil {
209+
klog.V(tsparams.LogLevel).Infof("Metric assertion failed for node %s: %v", assertConfig.NodeName, err)
210+
190211
errChan <- fmt.Errorf("metric assertion failed: %w", err)
212+
213+
return
191214
}
215+
216+
klog.V(tsparams.LogLevel).Infof(
217+
"Metric assertion passed for node %s with query %s and expected value %d",
218+
assertConfig.NodeName,
219+
assertConfig.MetricQuery.ToMetricQuery().String(),
220+
int64(assertConfig.ExpectedMetricValue),
221+
)
192222
})
193223

194224
waitGroup.Wait()

tests/cnf/ran/ptp/internal/events/README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ The `WaitForEvent` function accepts several optional parameters:
3333

3434
Specifies the container name within the pod from which to retrieve logs. If not specified, logs are retrieved from the default container. This is particularly useful when monitoring PTP events from specific containers like `"cloud-event-proxy"`.
3535

36-
#### `WithoutCurrentState(ignoreCurrentState bool)`
36+
#### `WithCurrentState(includeCurrentState bool)`
3737

38-
Controls whether to ignore messages about the current state of events. When set to `true`, only events received as subscriptions are considered, filtering out initial state reports. This is useful when you want to wait for new events rather than existing state information.
38+
Controls whether to include messages about the current state of events. By default, current state messages are not included.
3939

4040
### Event Filtering
4141

@@ -112,14 +112,13 @@ func main() {
112112

113113
fmt.Printf("Waiting for PTP Sync State Locked event on interface %s...\n", targetInterface)
114114

115-
// Use both WithContainer and WithoutCurrentState options
115+
// Use the container option.
116116
err := events.WaitForEvent(
117117
myPtpPod,
118118
startTime,
119119
timeout,
120120
eventFilter,
121121
events.WithContainer("cloud-event-proxy"),
122-
events.WithoutCurrentState(true), // Only wait for new events, not current state
123122
)
124123
if err != nil {
125124
fmt.Printf("Error waiting for event: %v\n", err)

tests/cnf/ran/ptp/internal/events/events.go

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ var (
3131
// waitForEventOptions is a struct that holds options for the WaitForEvent function. Options will update this struct and
3232
// the final result is used to configure the WaitForEvent function.
3333
type waitForEventOptions struct {
34-
container string
35-
ignoreCurrentState bool
34+
container string
35+
includeCurrentState bool
3636
}
3737

3838
// WaitForEventOption is a function that modifies the waitForEventOptions struct. It is used to set options for the
@@ -47,11 +47,11 @@ func WithContainer(container string) WaitForEventOption {
4747
}
4848
}
4949

50-
// WithoutCurrentState is an option for the WaitForEvent function that specifies whether to ignore messages about the
51-
// current state of events. This allows for checking only events that are received as a subscription.
52-
func WithoutCurrentState(ignoreCurrentState bool) WaitForEventOption {
50+
// WithCurrentState is an option for the WaitForEvent function that specifies whether to include current state messages
51+
// while extracting events from logs.
52+
func WithCurrentState(includeCurrentState bool) WaitForEventOption {
5353
return func(options *waitForEventOptions) {
54-
options.ignoreCurrentState = ignoreCurrentState
54+
options.includeCurrentState = includeCurrentState
5555
}
5656
}
5757

@@ -71,6 +71,15 @@ func WaitForEvent(
7171
option(&combinedOptions)
7272
}
7373

74+
klog.V(tsparams.LogLevel).Infof(
75+
"Waiting for event: startTime=%s timeout=%s container=%q includeCurrentState=%t filter=%#v",
76+
startTime,
77+
timeout,
78+
combinedOptions.container,
79+
combinedOptions.includeCurrentState,
80+
filter,
81+
)
82+
7483
return wait.PollUntilContextTimeout(
7584
context.TODO(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
7685
// Each loop we save the previous start time and set the new start time to the current time.
@@ -91,24 +100,42 @@ func WaitForEvent(
91100

92101
klog.V(tsparams.LogLevel).Infof("Logs: %s", string(logs))
93102

94-
extractedEvents := extractEventsFromLogs(logs, combinedOptions.ignoreCurrentState)
103+
totalEventLikeLines := len(containsEventRegexp.FindAll(logs, -1))
104+
105+
nonCurrentStateEventLines := len(containsEventNotStateRegexp.FindAll(logs, -1))
106+
if !combinedOptions.includeCurrentState && totalEventLikeLines > nonCurrentStateEventLines {
107+
klog.V(tsparams.LogLevel).Infof(
108+
"Ignored %d CurrentState event log lines (includeCurrentState=%t)",
109+
totalEventLikeLines-nonCurrentStateEventLines,
110+
combinedOptions.includeCurrentState,
111+
)
112+
}
113+
114+
extractedEvents := extractEventsFromLogs(logs, combinedOptions.includeCurrentState)
95115

96116
klog.V(tsparams.LogLevel).Infof("Extracted events: %#v\nFilter: %#v", extractedEvents, filter)
97117

98-
return slices.ContainsFunc(extractedEvents, filter.Filter), nil
118+
matchingEventIndex := slices.IndexFunc(extractedEvents, filter.Filter)
119+
if matchingEventIndex != -1 {
120+
klog.V(tsparams.LogLevel).Infof("Matched event: %#v", extractedEvents[matchingEventIndex])
121+
122+
return true, nil
123+
}
124+
125+
return false, nil
99126
})
100127
}
101128

102129
// extractEventsFromLogs extracts events from the logs of either the cloud event consumer or the cloud event proxy
103130
// containers. Rather than return errors, this function logs them and ignores the line. All lines that were able to be
104131
// parsed into events are returned.
105-
func extractEventsFromLogs(logs []byte, ignoreCurrentState bool) []event.Event {
132+
func extractEventsFromLogs(logs []byte, includeCurrentState bool) []event.Event {
106133
var extractedEvents []event.Event
107134

108135
for line := range bytes.Lines(logs) {
109-
matcher := containsEventRegexp
110-
if ignoreCurrentState {
111-
matcher = containsEventNotStateRegexp
136+
matcher := containsEventNotStateRegexp
137+
if includeCurrentState {
138+
matcher = containsEventRegexp
112139
}
113140

114141
if !matcher.Match(line) {

tests/cnf/ran/ptp/internal/metrics/assert.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,17 @@ func AssertQuery[V constraints.Integer](
139139
option(opts)
140140
}
141141

142+
metricQuery := query.ToMetricQuery()
143+
klog.V(tsparams.LogLevel).Infof(
144+
"Starting query assertion: query=%s expected=%d startTime=%s timeout=%s pollInterval=%s stableDuration=%s",
145+
metricQuery.String(),
146+
int64(expected),
147+
opts.startTime,
148+
opts.timeout,
149+
opts.pollInterval,
150+
opts.stableDuration,
151+
)
152+
142153
// queryTime is the time at which each query is executed. It begins as the start time and will be incremented by
143154
// the poll interval until the timeout is reached.
144155
queryTime := opts.startTime
@@ -189,6 +200,16 @@ func AssertQuery[V constraints.Integer](
189200
}
190201
}
191202

203+
klog.V(tsparams.LogLevel).Infof(
204+
"Query assertion timed out: query=%s expected=%d startTime=%s timeout=%s pollInterval=%s stableDuration=%s",
205+
metricQuery.String(),
206+
int64(expected),
207+
opts.startTime,
208+
opts.timeout,
209+
opts.pollInterval,
210+
opts.stableDuration,
211+
)
212+
192213
return fmt.Errorf("failed to assert query eventually: timeout of %s exceeded", opts.timeout)
193214
}
194215

tests/cnf/ran/ptp/ptp_suite_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ var _ = AfterSuite(func() {
6363
By("cleaning up Prometheus API client resources")
6464
err = querier.CleanupQuerierResources(RANConfig.Spoke1APIClient)
6565
Expect(err).ToNot(HaveOccurred(), "Failed to cleanup Prometheus API client resources")
66-
6766
})
6867

6968
var _ = JustAfterEach(func() {

tests/cnf/ran/ptp/tests/ptp-event-consumer.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/internal/ranparam"
1818
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/internal/version"
1919
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/consumer"
20+
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/eventmetric"
2021
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/events"
2122
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/iface"
2223
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/metrics"
@@ -107,15 +108,19 @@ var _ = Describe("PTP Event Consumer", Label(tsparams.LabelEventConsumer), func(
107108
metrics.AssertWithTimeout(10*time.Minute))
108109
Expect(err).ToNot(HaveOccurred(), "Failed to assert clock state is locked and stable after pod restart")
109110

110-
By("waiting up to 10 minutes since startTime for the locked event on the node")
111-
eventPod, err := consumer.GetConsumerPodforNode(RANConfig.Spoke1APIClient, nodeInfo.Name)
112-
Expect(err).ToNot(HaveOccurred(), "Failed to get event pod for node %s", nodeInfo.Name)
113-
114-
filter := events.All(
111+
By("waiting up to 10 minutes since startTime for the locked event and metric on the node")
112+
lockedFilter := events.All(
115113
events.IsType(eventptp.PtpStateChange),
116114
events.HasValue(events.WithSyncState(eventptp.LOCKED)),
117115
)
118-
err = events.WaitForEvent(eventPod, startTime, 10*time.Minute, filter)
116+
err = eventmetric.NewAssertion(prometheusAPI,
117+
metrics.ClockStateQuery{Process: metrics.DoesNotEqual(metrics.ProcessChronyd)},
118+
metrics.ClockStateLocked, lockedFilter).
119+
ForNode(RANConfig.Spoke1APIClient, nodeInfo.Name).
120+
WithStartTime(startTime).
121+
WithTimeout(10 * time.Minute).
122+
WithMetricOptions(metrics.AssertWithStableDuration(10 * time.Second)).
123+
ExecuteAssertion(context.TODO())
119124
Expect(err).ToNot(HaveOccurred(), "Failed to wait for locked event on node %s", nodeInfo.Name)
120125
}
121126

@@ -126,6 +131,14 @@ var _ = Describe("PTP Event Consumer", Label(tsparams.LabelEventConsumer), func(
126131

127132
// 82218 - Validates the consumer events after ptpoperatorconfig api version is modified
128133
It("validates the consumer events after ptpoperatorconfig api version is modified", reportxml.ID("82218"), func() {
134+
By("checking if events are enabled")
135+
eventsEnabled, err := consumer.AreEventsEnabled(RANConfig.Spoke1APIClient)
136+
Expect(err).ToNot(HaveOccurred(), "Failed to check if events are enabled")
137+
138+
if !eventsEnabled {
139+
Skip("Events are not enabled, skipping event consumer API version test")
140+
}
141+
129142
By("checking if the PTP version is within the 4.16-4.18 range")
130143
inRange, err := version.IsVersionStringInRange(RANConfig.Spoke1OperatorVersions[ranparam.PTP], "4.16", "4.18")
131144
Expect(err).ToNot(HaveOccurred(), "Failed to check PTP version range")

tests/cnf/ran/ptp/tests/ptp-events-and-metrics.go

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/internal/nicinfo"
1515
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/internal/querier"
1616
. "github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/internal/raninittools"
17-
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/consumer"
17+
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/eventmetric"
1818
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/events"
1919
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/iface"
2020
"github.com/rh-ecosystem-edge/eco-gotests/tests/cnf/ran/ptp/internal/metrics"
@@ -100,30 +100,47 @@ var _ = Describe("PTP Events and Metrics", Label(tsparams.LabelEventsAndMetrics)
100100
continue
101101
}
102102

103-
testRanAtLeastOnce = true
104103
ifaceGroups := iface.GroupInterfacesByNIC(profiles.GetInterfacesNames(clientInterfaces))
105104

106-
By("getting the event pod for the node")
107-
eventPod, err := consumer.GetConsumerPodforNode(RANConfig.Spoke1APIClient, nodeInfo.Name)
108-
Expect(err).ToNot(HaveOccurred(), "Failed to get event pod for node %s", nodeInfo.Name)
105+
By("getting the egress interface for the node")
106+
egressInterface, err := iface.GetEgressInterfaceName(RANConfig.Spoke1APIClient, nodeInfo.Name)
107+
Expect(err).ToNot(HaveOccurred(), "Failed to get egress interface name for node %s", nodeInfo.Name)
109108

110109
for nic, ifaces := range ifaceGroups {
110+
if nic == egressInterface.GetNIC() {
111+
klog.V(tsparams.LogLevel).Infof("Skipping egress NIC %s", nic)
112+
113+
continue
114+
}
115+
116+
testRanAtLeastOnce = true
117+
111118
// Include this interface in the interface information report for this suite.
112119
nicinfo.Node(nodeInfo.Name).MarkTested(string(ifaces[0]))
113120

114121
startTime := time.Now()
115122

116-
By("adjusting the PHC by 5 ms for NIC " + string(nic))
117-
err := iface.AdjustPTPHardwareClock(RANConfig.Spoke1APIClient, nodeInfo.Name, ifaces[0], 0.005)
123+
By("adjusting the PHC by 100 ms for NIC " + string(nic))
124+
err := iface.AdjustPTPHardwareClock(RANConfig.Spoke1APIClient, nodeInfo.Name, ifaces[0], 0.1)
118125
Expect(err).ToNot(HaveOccurred(),
119126
"Failed to adjust PTP hardware clock for interface %s on node %s", ifaces[0], nodeInfo.Name)
120127

121-
By("waiting to receive a FREERUN event")
122-
filter := events.All(
128+
By("waiting to receive a FREERUN event and metric")
129+
freerunFilter := events.All(
123130
events.IsType(eventptp.PtpStateChange),
124-
events.HasValue(events.WithSyncState(eventptp.FREERUN)),
131+
events.HasValue(events.WithSyncState(eventptp.FREERUN), events.OnInterface(nic)),
125132
)
126-
err = events.WaitForEvent(eventPod, startTime, 5*time.Minute, filter, events.WithoutCurrentState(true))
133+
ptp4lClockStateQuery := metrics.ClockStateQuery{
134+
Node: metrics.Equals(nodeInfo.Name),
135+
Interface: metrics.Equals(nic),
136+
Process: metrics.Equals(metrics.ProcessPTP4L),
137+
}
138+
err = eventmetric.NewAssertion(prometheusAPI, ptp4lClockStateQuery, metrics.ClockStateFreerun, freerunFilter).
139+
ForNode(RANConfig.Spoke1APIClient, nodeInfo.Name).
140+
WithStartTime(startTime).
141+
WithTimeout(5 * time.Minute).
142+
WithMetricOptions(metrics.AssertWithPollInterval(1 * time.Second)).
143+
ExecuteAssertion(context.TODO())
127144
Expect(err).ToNot(HaveOccurred(),
128145
"Failed to wait for free run event on interface %s on node %s", ifaces[0], nodeInfo.Name)
129146

@@ -137,12 +154,16 @@ var _ = Describe("PTP Events and Metrics", Label(tsparams.LabelEventsAndMetrics)
137154
// The locked event should happen much sooner than in 15 minutes, except for GM
138155
// profiles. This is since the RDS's ts2phc settings for the servo do not allow the PHC
139156
// to be updated as quickly. The ptp4l settings allow it to be updated much faster.
140-
By("waiting to receive a locked event")
141-
filter = events.All(
157+
By("waiting to receive a locked event and metric")
158+
lockedFilter := events.All(
142159
events.IsType(eventptp.PtpStateChange),
143-
events.HasValue(events.WithSyncState(eventptp.LOCKED)),
160+
events.HasValue(events.WithSyncState(eventptp.LOCKED), events.OnInterface(nic)),
144161
)
145-
err = events.WaitForEvent(eventPod, startTime, 15*time.Minute, filter, events.WithoutCurrentState(true))
162+
err = eventmetric.NewAssertion(prometheusAPI, ptp4lClockStateQuery, metrics.ClockStateLocked, lockedFilter).
163+
ForNode(RANConfig.Spoke1APIClient, nodeInfo.Name).
164+
WithStartTime(startTime).
165+
WithTimeout(15 * time.Minute).
166+
ExecuteAssertion(context.TODO())
146167
Expect(err).ToNot(HaveOccurred(),
147168
"Failed to wait for locked event on interface %s on node %s", ifaces[0], nodeInfo.Name)
148169
}

0 commit comments

Comments
 (0)