Skip to content

Commit 6c64a4d

Browse files
committed
make E2E tests more robust for CI
1 parent dc10916 commit 6c64a4d

File tree

1 file changed

+68
-24
lines changed

1 file changed

+68
-24
lines changed

operator/e2e/tests/cert_management_test.go

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,10 @@ func Test_CM1_CertManagementRoundTrip(t *testing.T) {
171171
t.Fatalf("Failed to apply Certificate: %v", err)
172172
}
173173

174-
// Wait for Secret to be created by Cert-Manager
175-
waitForSecret(t, ctx, clientset, "grove-webhook-server-cert", true)
174+
// Wait for cert-manager to actually take over the secret.
175+
// This is critical because the secret may already exist from auto-provision mode,
176+
// and we need to wait for cert-manager to update it (not just check existence).
177+
waitForSecretManagedByCertManager(t, ctx, clientset, "grove-webhook-server-cert")
176178

177179
logger.Info("3. Upgrade Grove to use cert-manager (autoProvision=false)")
178180
upgradeGroveToCertManager(t, ctx, restConfig)
@@ -263,6 +265,32 @@ func waitForSecret(t *testing.T, ctx context.Context, clientset *kubernetes.Clie
263265
}
264266
}
265267

268+
// waitForSecretManagedByCertManager waits for a secret to exist AND be managed by cert-manager.
269+
// This is important because when transitioning from auto-provision to cert-manager mode,
270+
// the secret may already exist from auto-provision, and we need to wait for cert-manager
271+
// to actually update it (which is indicated by the cert-manager.io/certificate-name annotation).
272+
func waitForSecretManagedByCertManager(t *testing.T, ctx context.Context, clientset *kubernetes.Clientset, name string) {
273+
t.Helper()
274+
275+
logger.Debugf("Waiting for secret %s to be managed by cert-manager...", name)
276+
err := utils.PollForCondition(ctx, defaultPollTimeout, defaultPollInterval, func() (bool, error) {
277+
secret, err := clientset.CoreV1().Secrets("grove-system").Get(ctx, name, metav1.GetOptions{})
278+
if err != nil {
279+
return false, nil // Secret doesn't exist yet, keep waiting
280+
}
281+
// Check if cert-manager has taken ownership of this secret
282+
certName := secret.Annotations[certManagerCertNameAnnotation]
283+
if certName != "" {
284+
logger.Debugf("Secret %s is now managed by cert-manager (certificate: %s)", name, certName)
285+
return true, nil
286+
}
287+
return false, nil
288+
})
289+
if err != nil {
290+
t.Fatalf("Timeout waiting for secret %s to be managed by cert-manager: %v", name, err)
291+
}
292+
}
293+
266294
func deleteCertManagerResources(ctx context.Context, clientset *kubernetes.Clientset, dynamicClient dynamic.Interface) {
267295
certGVR := schema.GroupVersionResource{Group: "cert-manager.io", Version: "v1", Resource: "certificates"}
268296
issuerGVR := schema.GroupVersionResource{Group: "cert-manager.io", Version: "v1", Resource: "clusterissuers"}
@@ -511,36 +539,52 @@ func verifyWebhookSecretCertManagerStatus(ctx context.Context, clientset *kubern
511539

512540
// verifyWebhookServingCertificate verifies that the webhook is actually serving the certificate from the Secret.
513541
// This connects to the webhook endpoint via TLS and compares the served certificate with the one in the Secret.
542+
// It includes retry logic to handle timing issues with:
543+
// - Kubernetes secret volume propagation delays
544+
// - The certwatcher's 10-second polling interval for detecting certificate changes
514545
func verifyWebhookServingCertificate(t *testing.T, ctx context.Context, clientset *kubernetes.Clientset, restConfig *rest.Config) {
515546
t.Helper()
516547

517-
logger.Debug("Verifying webhook is serving the correct certificate...")
548+
logger.Debug("Verifying webhook is serving the correct certificate (with retries for cert reload timing)...")
518549

519-
// Get the certificate from the Secret
520-
expectedCert, err := getCertificateFromSecret(ctx, clientset)
521-
if err != nil {
522-
t.Fatalf("Failed to get certificate from secret: %v", err)
523-
}
524-
logger.Debugf("Expected certificate serial number: %s", expectedCert.SerialNumber.String())
550+
// Retry for up to 30 seconds to account for:
551+
// - Kubernetes secret volume update propagation (can take up to the kubelet sync period)
552+
// - certwatcher 10-second polling interval
553+
var lastExpectedSerial, lastServedSerial string
554+
err := utils.PollForCondition(ctx, 30*time.Second, 2*time.Second, func() (bool, error) {
555+
// Get the certificate from the Secret
556+
expectedCert, err := getCertificateFromSecret(ctx, clientset)
557+
if err != nil {
558+
logger.Debugf("Failed to get certificate from secret: %v", err)
559+
return false, nil
560+
}
561+
lastExpectedSerial = expectedCert.SerialNumber.String()
525562

526-
// Get the certificate the webhook is actually serving
527-
servedCert, err := getServedCertificate(ctx, clientset, restConfig)
528-
if err != nil {
529-
t.Fatalf("Failed to get served certificate from webhook: %v", err)
530-
}
531-
logger.Debugf("Served certificate serial number: %s", servedCert.SerialNumber.String())
563+
// Get the certificate the webhook is actually serving
564+
servedCert, err := getServedCertificate(ctx, clientset, restConfig)
565+
if err != nil {
566+
logger.Debugf("Failed to get served certificate from webhook: %v", err)
567+
return false, nil
568+
}
569+
lastServedSerial = servedCert.SerialNumber.String()
570+
571+
// Compare the certificates
572+
if certificatesMatch(expectedCert, servedCert) {
573+
logger.Debugf("Certificate match! Serial: %s", lastExpectedSerial)
574+
return true, nil
575+
}
532576

533-
// Compare the certificates
534-
if !certificatesMatch(expectedCert, servedCert) {
535-
t.Fatalf("Certificate mismatch: webhook is not serving the expected certificate.\n"+
577+
logger.Debugf("Certificate mismatch (will retry): expected serial=%s, served serial=%s",
578+
lastExpectedSerial, lastServedSerial)
579+
return false, nil
580+
})
581+
582+
if err != nil {
583+
t.Fatalf("Certificate mismatch: webhook is not serving the expected certificate after retries.\n"+
536584
"Expected serial: %s\n"+
537585
"Served serial: %s\n"+
538-
"Expected subject: %s\n"+
539-
"Served subject: %s",
540-
expectedCert.SerialNumber.String(),
541-
servedCert.SerialNumber.String(),
542-
expectedCert.Subject.String(),
543-
servedCert.Subject.String())
586+
"This may indicate the operator has not reloaded the certificate from the secret.",
587+
lastExpectedSerial, lastServedSerial)
544588
}
545589

546590
logger.Debug("Verified: webhook is serving the correct certificate from the Secret")

0 commit comments

Comments
 (0)