Skip to content

Commit 0c0b34d

Browse files
committed
tests: Add E2E test for MAC collision alert
Adds a new test that verifies the KubemacpoolMACCollisionDetected alert fires when MAC collisions exist and clears when they are resolved. The test creates two sets of colliding VMIs, verifies the alert fires, then removes VMIs one by one to confirm the alert persists with partial resolution and clears only when all collisions are gone. In order for the test to be able to get the alert info the prometheus statefulset is patched in order to recognize kubemacpool's serviceMonitorSelector. Signed-off-by: Ram Lavi <ralavi@redhat.com>
1 parent e533356 commit 0c0b34d

File tree

2 files changed

+215
-0
lines changed

2 files changed

+215
-0
lines changed

tests/alert_collision_test.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
/*
2+
Copyright 2025 The KubeMacPool Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package tests
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"math/rand"
23+
"os/exec"
24+
"time"
25+
26+
. "github.com/onsi/ginkgo/v2"
27+
. "github.com/onsi/gomega"
28+
29+
apierrors "k8s.io/apimachinery/pkg/api/errors"
30+
kubevirtv1 "kubevirt.io/api/core/v1"
31+
"sigs.k8s.io/controller-runtime/pkg/client"
32+
33+
"github.com/k8snetworkplumbingwg/kubemacpool/tests/kubectl"
34+
)
35+
36+
const (
37+
CollisionAlertsLabel = "collision-alerts"
38+
collisionAlertName = "KubemacpoolMACCollisionDetected"
39+
40+
alertTimeout = 2 * time.Minute
41+
alertPollingInterval = 5 * time.Second
42+
)
43+
44+
var portForwardCmd *exec.Cmd
45+
46+
var _ = Describe("MAC Collision Alerts", Label(CollisionAlertsLabel), Serial, Ordered, func() {
47+
var (
48+
nadName1, nadName2 string
49+
prometheusClient *PromClient
50+
)
51+
52+
BeforeAll(func() {
53+
By("Patching Prometheus and restarting to pick up kubemacpool configuration")
54+
Expect(patchPrometheusForKubemacpool()).To(Succeed())
55+
56+
nadName1 = randName("alert-br1")
57+
nadName2 = randName("alert-br2")
58+
By(fmt.Sprintf("Creating network attachment definitions: %s, %s", nadName1, nadName2))
59+
Expect(createNetworkAttachmentDefinition(TestNamespace, nadName1)).To(Succeed())
60+
Expect(createNetworkAttachmentDefinition(TestNamespace, nadName2)).To(Succeed())
61+
62+
// Setup port forwarding to Prometheus
63+
sourcePort := 4321 + rand.Intn(6000) // #nosec G404 -- weak random is fine for test port selection
64+
targetPort := 9090
65+
By(fmt.Sprintf("Setting up port forwarding to Prometheus API on port %d", sourcePort))
66+
67+
var err error
68+
portForwardCmd, err = kubectl.StartPortForwardCommand(prometheusMonitoringNamespace, "prometheus-k8s-0", sourcePort, targetPort)
69+
Expect(err).ToNot(HaveOccurred())
70+
71+
prometheusClient = NewPromClient(sourcePort, prometheusMonitoringNamespace)
72+
})
73+
74+
AfterAll(func() {
75+
By("Removing port-forwarding command")
76+
Expect(kubectl.KillPortForwardCommand(portForwardCmd)).To(Succeed())
77+
78+
By("Deleting network attachment definitions")
79+
Expect(deleteNetworkAttachmentDefinition(TestNamespace, nadName1)).To(Succeed())
80+
Expect(deleteNetworkAttachmentDefinition(TestNamespace, nadName2)).To(Succeed())
81+
})
82+
83+
AfterEach(func() {
84+
vmiList := &kubevirtv1.VirtualMachineInstanceList{}
85+
Expect(testClient.CRClient.List(context.TODO(), vmiList)).To(Succeed())
86+
87+
for i := range vmiList.Items {
88+
vmiObject := &vmiList.Items[i]
89+
err := testClient.CRClient.Delete(context.TODO(), vmiObject)
90+
if err != nil && !apierrors.IsNotFound(err) {
91+
Expect(err).ToNot(HaveOccurred())
92+
}
93+
}
94+
95+
Eventually(func() []kubevirtv1.VirtualMachineInstance {
96+
vmiList := &kubevirtv1.VirtualMachineInstanceList{}
97+
Expect(testClient.CRClient.List(context.TODO(), vmiList)).To(Succeed())
98+
return vmiList.Items
99+
}).WithTimeout(timeout).WithPolling(pollingInterval).Should(HaveLen(0), "failed to remove all VMI objects")
100+
})
101+
102+
It("should trigger alert when collisions exist and clear when resolved", func() {
103+
const (
104+
mac1 = "02:00:00:00:aa:01"
105+
mac2 = "02:00:00:00:aa:02"
106+
)
107+
108+
By("Step 1: Verifying alert is not firing initially")
109+
expectAlertNotFiring(prometheusClient, collisionAlertName)
110+
111+
By("Step 2: Creating 2 sets of VMIs colliding on 2 different MACs")
112+
// Set 1: vmi1a and vmi1b collide on mac1
113+
vmi1a := NewVMI(TestNamespace, "test-alert-1a",
114+
WithInterface(newInterface(nadName1, mac1)),
115+
WithNetwork(newNetwork(nadName1)))
116+
Expect(testClient.CRClient.Create(context.TODO(), vmi1a)).To(Succeed())
117+
118+
vmi1b := NewVMI(TestNamespace, "test-alert-1b",
119+
WithInterface(newInterface(nadName2, mac1)),
120+
WithNetwork(newNetwork(nadName2)))
121+
Expect(testClient.CRClient.Create(context.TODO(), vmi1b)).To(Succeed())
122+
123+
// Set 2: vmi2a and vmi2b collide on mac2
124+
vmi2a := NewVMI(TestNamespace, "test-alert-2a",
125+
WithInterface(newInterface(nadName1, mac2)),
126+
WithNetwork(newNetwork(nadName1)))
127+
Expect(testClient.CRClient.Create(context.TODO(), vmi2a)).To(Succeed())
128+
129+
vmi2b := NewVMI(TestNamespace, "test-alert-2b",
130+
WithInterface(newInterface(nadName2, mac2)),
131+
WithNetwork(newNetwork(nadName2)))
132+
Expect(testClient.CRClient.Create(context.TODO(), vmi2b)).To(Succeed())
133+
134+
waitForVMIsRunning([]vmiReference{
135+
{vmi1a.Namespace, vmi1a.Name},
136+
{vmi1b.Namespace, vmi1b.Name},
137+
{vmi2a.Namespace, vmi2a.Name},
138+
{vmi2b.Namespace, vmi2b.Name},
139+
})
140+
141+
By("Step 3: Verifying alert is firing (2 collisions exist)")
142+
expectAlertFiring(prometheusClient, collisionAlertName)
143+
144+
By("Step 4: Removing 1 VMI from first collision set (mac1 collision cleared)")
145+
Expect(testClient.CRClient.Delete(context.TODO(), vmi1a)).To(Succeed())
146+
Eventually(func() bool {
147+
return apierrors.IsNotFound(testClient.CRClient.Get(context.TODO(), client.ObjectKey{
148+
Namespace: vmi1a.Namespace,
149+
Name: vmi1a.Name,
150+
}, &kubevirtv1.VirtualMachineInstance{}))
151+
}).WithTimeout(timeout).WithPolling(pollingInterval).Should(BeTrue())
152+
153+
By("Step 5: Verifying alert is still firing (mac2 collision still exists)")
154+
expectAlertFiring(prometheusClient, collisionAlertName)
155+
156+
By("Step 6: Removing 1 VMI from second collision set (all collisions cleared)")
157+
Expect(testClient.CRClient.Delete(context.TODO(), vmi2a)).To(Succeed())
158+
Eventually(func() bool {
159+
return apierrors.IsNotFound(testClient.CRClient.Get(context.TODO(), client.ObjectKey{
160+
Namespace: vmi2a.Namespace,
161+
Name: vmi2a.Name,
162+
}, &kubevirtv1.VirtualMachineInstance{}))
163+
}).WithTimeout(timeout).WithPolling(pollingInterval).Should(BeTrue())
164+
165+
By("Step 7: Verifying alert is no longer firing")
166+
expectAlertNotFiring(prometheusClient, collisionAlertName)
167+
})
168+
})
169+
170+
// expectAlertFiring waits for the specified alert to be firing
171+
func expectAlertFiring(p *PromClient, alertName string) {
172+
By(fmt.Sprintf("Waiting for alert %s to fire", alertName))
173+
Eventually(func() bool { return p.IsAlertFiring(alertName) }).
174+
WithTimeout(alertTimeout).WithPolling(alertPollingInterval).Should(BeTrue(),
175+
fmt.Sprintf("alert %s should be firing", alertName))
176+
}
177+
178+
// expectAlertNotFiring waits for the specified alert to stop firing
179+
func expectAlertNotFiring(p *PromClient, alertName string) {
180+
By(fmt.Sprintf("Waiting for alert %s to stop firing", alertName))
181+
Eventually(func() bool { return p.IsAlertFiring(alertName) }).
182+
WithTimeout(alertTimeout).WithPolling(alertPollingInterval).Should(BeFalse(),
183+
fmt.Sprintf("alert %s should not be firing", alertName))
184+
}

tests/metrics.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,34 @@ func (p *PromClient) IsAlertFiring(alertName string) bool {
223223
}
224224
return alert != nil && alert.State == promApiv1.AlertStateFiring
225225
}
226+
227+
// patchPrometheusForKubemacpool patches the Prometheus instance to include kubemacpool PrometheusRules and ServiceMonitors,
228+
// then restarts the Prometheus pod to pick up the new configuration and RBAC permissions.
229+
func patchPrometheusForKubemacpool() error {
230+
// Patch ruleSelector to load our PrometheusRule
231+
_, stderr, err := kubectl.Kubectl("patch", "prometheus", "k8s", "-n", prometheusMonitoringNamespace, "--type=json", "-p",
232+
`[{"op": "replace", "path": "/spec/ruleSelector", "value":{"matchLabels": {"prometheus.kubemacpool.io": "true"}}}]`)
233+
if err != nil {
234+
return fmt.Errorf("failed to patch ruleSelector: %s: %w", stderr, err)
235+
}
236+
237+
// Patch serviceMonitorSelector to scrape our metrics
238+
_, stderr, err = kubectl.Kubectl("patch", "prometheus", "k8s", "-n", prometheusMonitoringNamespace, "--type=json", "-p",
239+
`[{"op": "replace", "path": "/spec/serviceMonitorSelector", "value":{"matchLabels": {"prometheus.kubemacpool.io": "true"}}}]`)
240+
if err != nil {
241+
return fmt.Errorf("failed to patch serviceMonitorSelector: %s: %w", stderr, err)
242+
}
243+
244+
// Restart Prometheus to pick up new configuration and RBAC permissions
245+
_, stderr, err = kubectl.Kubectl("rollout", "restart", "statefulset/prometheus-k8s", "-n", prometheusMonitoringNamespace)
246+
if err != nil {
247+
return fmt.Errorf("failed to rollout prometheus: %s: %w", stderr, err)
248+
}
249+
250+
_, stderr, err = kubectl.Kubectl("rollout", "status", "statefulset/prometheus-k8s", "-n", prometheusMonitoringNamespace, "--timeout=120s")
251+
if err != nil {
252+
return fmt.Errorf("failed to wait for prometheus rollout: %s: %w", stderr, err)
253+
}
254+
255+
return nil
256+
}

0 commit comments

Comments
 (0)