Skip to content

Commit 62f800b

Browse files
authored
Fix webhook admission control deadlock during installation (#2179)
This fixes a chicken-and-egg bootstrap issue where the operator would get stuck during KnativeServing installation. Problem: - ValidatingWebhookConfiguration with failurePolicy=Fail intercepts Certificate resource creation - If Certificate resources are created before the webhook pod is ready, the API server rejects them - The activator deployment depends on the routing-serving-certs secret (generated from a Certificate resource) at runtime - Previous stage ordering would check all deployments (including activator) before creating Certificate resources, causing a deadlock Solution: 1. Added CheckWebhookDeployment() function that waits specifically for the webhook deployment to be ready before proceeding 2. Reordered reconciliation stages: - manifests.Install (creates all deployments) - CheckWebhookDeployment (waits for webhook to be ready) - InstallWebhookDependentResources (creates Certificate resources) - CheckDeployments (checks all deployments including activator) This ensures: - Webhook is ready before Certificate creation (avoids admission rejection) - Certificate resources exist before checking activator (avoids missing secret) - Clear error message if webhook deployment is missing from manifest Related functions: - pkg/reconciler/common/deployments.go: Added CheckWebhookDeployment() - pkg/reconciler/knativeserving/knativeserving.go: Reordered stages - pkg/reconciler/common/install.go: Added logging for consistency
1 parent 0423300 commit 62f800b

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed

pkg/reconciler/common/deployments.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package common
1919
import (
2020
"context"
2121
"errors"
22+
"fmt"
2223

2324
mf "github.com/manifestival/manifestival"
2425
appsv1 "k8s.io/api/apps/v1"
@@ -27,6 +28,7 @@ import (
2728
"k8s.io/client-go/kubernetes/scheme"
2829

2930
"knative.dev/operator/pkg/apis/operator/base"
31+
"knative.dev/pkg/logging"
3032
)
3133

3234
type deploymentsNotReadyError struct{}
@@ -43,6 +45,43 @@ func IsDeploymentsNotReadyError(err error) bool {
4345
return errors.Is(err, deploymentsNotReadyError{})
4446
}
4547

48+
// CheckWebhookDeployment checks if the webhook deployment is ready.
49+
// This is needed before creating webhook-dependent resources (e.g., Certificates)
50+
// because the ValidatingWebhookConfiguration with failurePolicy=Fail will reject
51+
// Certificate creation if the webhook pod is not ready.
52+
func CheckWebhookDeployment(ctx context.Context, manifest *mf.Manifest, instance base.KComponent) error {
53+
logger := logging.FromContext(ctx)
54+
logger.Debug("Checking webhook deployment")
55+
status := instance.GetStatus()
56+
webhookDeployment := manifest.Filter(mf.ByKind("Deployment"), mf.ByName("webhook"))
57+
58+
if len(webhookDeployment.Resources()) == 0 {
59+
status.MarkInstallFailed("webhook deployment not found in manifest")
60+
return fmt.Errorf("webhook deployment not found in manifest")
61+
}
62+
63+
for _, u := range webhookDeployment.Resources() {
64+
resource, err := manifest.Client.Get(&u)
65+
if err != nil {
66+
status.MarkDeploymentsNotReady([]string{"webhook"})
67+
if apierrors.IsNotFound(err) {
68+
return deploymentsNotReadyError{}
69+
}
70+
return err
71+
}
72+
deployment := &appsv1.Deployment{}
73+
if err := scheme.Scheme.Convert(resource, deployment, nil); err != nil {
74+
return err
75+
}
76+
if !isDeploymentAvailable(deployment) {
77+
status.MarkDeploymentsNotReady([]string{"webhook"})
78+
return deploymentsNotReadyError{}
79+
}
80+
}
81+
82+
return nil
83+
}
84+
4685
// CheckDeployments checks all deployments in the given manifest and updates the given
4786
// status with the status of the deployments.
4887
func CheckDeployments(ctx context.Context, manifest *mf.Manifest, instance base.KComponent) error {

pkg/reconciler/common/install.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func InstallWebhookConfigs(ctx context.Context, manifest *mf.Manifest, instance
9090

9191
// InstallWebhookDependentResources applies the Webhook dependent resources updates the given status accordingly.
9292
func InstallWebhookDependentResources(ctx context.Context, manifest *mf.Manifest, instance base.KComponent) error {
93+
logging.FromContext(ctx).Debug("Installing webhook dependent resources")
9394
status := instance.GetStatus()
9495
if err := manifest.Filter(webhookDependentResources).Apply(); err != nil {
9596
status.MarkInstallFailed(err.Error())

pkg/reconciler/knativeserving/knativeserving.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,10 @@ func (r *Reconciler) ReconcileKind(ctx context.Context, ks *v1beta1.KnativeServi
124124
r.appendExtensionManifests,
125125
r.transform,
126126
manifests.Install,
127-
manifests.SetManifestPaths, // setting path right after applying manifests to populate paths
128-
common.CheckDeployments,
127+
manifests.SetManifestPaths, // setting path right after applying manifests to populate paths
128+
common.CheckWebhookDeployment, // Wait for webhook to be ready before creating Certificate resources
129129
common.InstallWebhookDependentResources,
130+
common.CheckDeployments,
130131
common.MarkStatusSuccess,
131132
common.DeleteObsoleteResources(ctx, ks, r.installed),
132133
}

0 commit comments

Comments
 (0)