Skip to content

Commit 7092317

Browse files
mtuliokmala
authored andcommitted
e2e/debug: increase data collection on e2e failures
1 parent 07f68c1 commit 7092317

File tree

1 file changed

+117
-1
lines changed

1 file changed

+117
-1
lines changed

tests/e2e/loadbalancer.go

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,23 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
307307

308308
By("creating backend server pods")
309309
_, err = e2e.LBJig.Run(e2e.buildReplicationController(tc.requireAffinity))
310-
framework.ExpectNoError(err)
310+
if err != nil {
311+
serviceName := e2e.LBJig.Name
312+
if e2e.svc != nil {
313+
serviceName = e2e.svc.Name
314+
}
315+
framework.Logf("ERROR: LoadBalancer provisioning failed for service %q: %v", serviceName, err)
316+
framework.Logf("ERROR: LoadBalancer provisioning timeout reached after %v", loadBalancerCreateTimeout)
317+
318+
// Ensure we have detailed debugging information before failing
319+
framework.Logf("=== LoadBalancer Provisioning Failure Debug Information ===")
320+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
321+
framework.Logf("=== End of LoadBalancer Provisioning Failure Debug Information ===")
322+
323+
// Fail the test immediately to prevent further execution
324+
framework.ExpectNoError(err, "LoadBalancer provisioning failed - check debug information above")
325+
}
326+
311327
framework.Logf("[K8S] Backend pods created, affinity required: %t", tc.requireAffinity)
312328

313329
if tc.hookPostServiceCreate != nil {
@@ -323,16 +339,32 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
323339
framework.Failf("Service is nil after LoadBalancer provisioning for service %s", e2e.LBJig.Name)
324340
}
325341
if len(e2e.svc.Spec.Ports) == 0 {
342+
framework.Logf("=== Service Ports Error Debug Information ===")
343+
framework.Logf("Service spec: %+v", e2e.svc.Spec)
344+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
345+
framework.Logf("=== End of Service Ports Error Debug Information ===")
326346
framework.Failf("No ports found in service spec for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
327347
}
328348
if len(e2e.svc.Status.LoadBalancer.Ingress) == 0 {
349+
framework.Logf("=== LoadBalancer Ingress Error Debug Information ===")
350+
framework.Logf("Service status: %+v", e2e.svc.Status)
351+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
352+
framework.Logf("=== End of LoadBalancer Ingress Error Debug Information ===")
329353
framework.Failf("No ingress found in LoadBalancer status for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
330354
}
331355

332356
svcPort := int(e2e.svc.Spec.Ports[0].Port)
333357
ingressAddress := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
334358
framework.Logf("[LB-INFO] Ingress address: %s, port: %d", ingressAddress, svcPort)
335359

360+
if ingressAddress == "" {
361+
framework.Logf("=== Empty Ingress Address Debug Information ===")
362+
framework.Logf("LoadBalancer ingress[0]: %+v", e2e.svc.Status.LoadBalancer.Ingress[0])
363+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
364+
framework.Logf("=== End of Empty Ingress Address Debug Information ===")
365+
framework.Failf("LoadBalancer ingress address is empty for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
366+
}
367+
336368
if tc.hookPreTest != nil {
337369
By("executing pre-test hook")
338370
tc.hookPreTest(e2e)
@@ -840,3 +872,87 @@ func inClusterTestReachableHTTP(cs clientset.Interface, namespace, nodeName, tar
840872

841873
return nil
842874
}
875+
876+
// Gather information from the cluster to help debug failures.
877+
// - Resource events
878+
// - All namespace events
879+
// - Cloud controller manager logs
880+
// - Service status
881+
func gatherResourceEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
882+
framework.Logf("=== Collecting resource events for debugging ===")
883+
events, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
884+
FieldSelector: "involvedObject.name=" + resourceName,
885+
})
886+
if err != nil {
887+
framework.Logf("Error getting events for resource %q: %v", resourceName, err)
888+
} else {
889+
framework.Logf("Resource events for %q:", resourceName)
890+
for _, event := range events.Items {
891+
framework.Logf(" [%s] %s/%s: %s - %s", event.Type, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
892+
}
893+
}
894+
}
895+
896+
func gatherAllEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
897+
framework.Logf("=== Collecting all namespace events ===")
898+
allEvents, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
899+
if err != nil {
900+
framework.Logf("Error getting all namespace events: %v", err)
901+
} else {
902+
framework.Logf("All events in namespace %q:", namespace)
903+
for _, event := range allEvents.Items {
904+
if strings.Contains(event.Message, "loadbalancer") || strings.Contains(event.Message, "LoadBalancer") ||
905+
strings.Contains(event.Reason, "LoadBalancer") || strings.Contains(event.Source.Component, "cloud-controller-manager") {
906+
framework.Logf(" [%s] %s/%s/%s: %s - %s", event.Type, event.Source.Component, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
907+
}
908+
}
909+
}
910+
}
911+
912+
func gatherControllerLogs(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
913+
framework.Logf("=== Collecting cloud controller manager logs ===")
914+
ccmPods, err := cs.CoreV1().Pods("").List(ctx, metav1.ListOptions{
915+
LabelSelector: "app=cloud-controller-manager",
916+
})
917+
if err != nil {
918+
framework.Logf("Error listing cloud controller manager pods: %v", err)
919+
} else {
920+
for _, pod := range ccmPods.Items {
921+
framework.Logf("Found CCM pod: %s/%s (phase: %s)", pod.Namespace, pod.Name, pod.Status.Phase)
922+
923+
// Get recent logs (last 50 lines)
924+
tailLines := int64(50)
925+
logOpts := &v1.PodLogOptions{
926+
TailLines: &tailLines,
927+
Previous: false,
928+
}
929+
logs, err1 := cs.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOpts).DoRaw(ctx)
930+
if err1 != nil {
931+
framework.Logf("Error getting logs for CCM pod %s/%s: %v", pod.Namespace, pod.Name, err)
932+
} else {
933+
framework.Logf("Recent logs from CCM pod %s/%s:", pod.Namespace, pod.Name)
934+
framework.Logf("%s", string(logs))
935+
}
936+
}
937+
}
938+
}
939+
940+
func gatherServiceStatus(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
941+
framework.Logf("=== Service Status ===")
942+
currentSvc, err := cs.CoreV1().Services(namespace).Get(ctx, resourceName, metav1.GetOptions{})
943+
if err != nil {
944+
framework.Logf("Error getting current service status: %v", err)
945+
} else {
946+
framework.Logf("Service %s status:", currentSvc.Name)
947+
framework.Logf(" Annotations: %+v", currentSvc.Annotations)
948+
framework.Logf(" LoadBalancer status: %+v", currentSvc.Status.LoadBalancer)
949+
framework.Logf(" Conditions: %+v", currentSvc.Status.Conditions)
950+
}
951+
}
952+
953+
func gatherEventosOnFailure(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
954+
gatherResourceEvents(ctx, cs, namespace, resourceName)
955+
gatherAllEvents(ctx, cs, namespace, resourceName)
956+
gatherControllerLogs(ctx, cs, namespace, resourceName)
957+
gatherServiceStatus(ctx, cs, namespace, resourceName)
958+
}

0 commit comments

Comments
 (0)