|
| 1 | +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors |
| 2 | +// |
| 3 | +// SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +package controlplane |
| 6 | + |
| 7 | +import ( |
| 8 | + "context" |
| 9 | + "fmt" |
| 10 | + |
| 11 | + extensionsconfigv1alpha1 "github.com/gardener/gardener/extensions/pkg/apis/config/v1alpha1" |
| 12 | + extensionscontroller "github.com/gardener/gardener/extensions/pkg/controller" |
| 13 | + "github.com/gardener/gardener/extensions/pkg/controller/controlplane" |
| 14 | + "github.com/gardener/gardener/extensions/pkg/util" |
| 15 | + extensionsv1alpha1 "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1" |
| 16 | + "github.com/go-logr/logr" |
| 17 | + corev1 "k8s.io/api/core/v1" |
| 18 | + "k8s.io/client-go/util/retry" |
| 19 | + "sigs.k8s.io/controller-runtime/pkg/client" |
| 20 | + "sigs.k8s.io/controller-runtime/pkg/manager" |
| 21 | + |
| 22 | + networking "github.com/gardener/gardener-extension-provider-aws/pkg/utils/networking" |
| 23 | +) |
| 24 | + |
| 25 | +const ( |
| 26 | + // NetworkUnavailableConditionType is the type of the NetworkUnavailable condition. |
| 27 | + NetworkUnavailableConditionType = "NetworkUnavailable" |
| 28 | + // CalicoIsUpReason is the reason set by Calico when it sets the NetworkUnavailable condition to indicate Calico is up. |
| 29 | + CalicoIsUpReason = "CalicoIsUp" |
| 30 | + // CalicoIsDownReason is the reason set by Calico when it sets the NetworkUnavailable condition to indicate Calico is down. |
| 31 | + CalicoIsDownReason = "CalicoIsDown" |
| 32 | + // AnnotationCalicoCleanupCompleted indicates that Calico condition cleanup has been completed. |
| 33 | + AnnotationCalicoCleanupCompleted = "aws.provider.extensions.gardener.cloud/calico-cleanup-completed" |
| 34 | +) |
| 35 | + |
| 36 | +// NewActuator creates a new Actuator that wraps the generic actuator and adds cleanup logic. |
| 37 | +func NewActuator(mgr manager.Manager, a controlplane.Actuator) controlplane.Actuator { |
| 38 | + return &actuator{ |
| 39 | + Actuator: a, |
| 40 | + client: mgr.GetClient(), |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +// actuator is an Actuator that acts upon and updates the status of ControlPlane resources. |
| 45 | +type actuator struct { |
| 46 | + controlplane.Actuator |
| 47 | + client client.Client |
| 48 | +} |
| 49 | + |
| 50 | +func (a *actuator) Reconcile( |
| 51 | + ctx context.Context, |
| 52 | + log logr.Logger, |
| 53 | + cp *extensionsv1alpha1.ControlPlane, |
| 54 | + cluster *extensionscontroller.Cluster, |
| 55 | +) (bool, error) { |
| 56 | + // Call Reconcile on the composed Actuator |
| 57 | + ok, err := a.Actuator.Reconcile(ctx, log, cp, cluster) |
| 58 | + if err != nil { |
| 59 | + return ok, err |
| 60 | + } |
| 61 | + |
| 62 | + // Only clean up NetworkUnavailable conditions if overlay is disabled |
| 63 | + overlayEnabled, err := networking.IsOverlayEnabled(cluster.Shoot.Spec.Networking) |
| 64 | + if err != nil { |
| 65 | + log.Error(err, "Failed to determine if overlay is enabled") |
| 66 | + return ok, err |
| 67 | + } |
| 68 | + |
| 69 | + // Clean up NetworkUnavailable conditions set by Calico only when overlay is disabled |
| 70 | + // Only run cleanup if it hasn't been completed yet (annotation not present) |
| 71 | + if !overlayEnabled && cp.Annotations[AnnotationCalicoCleanupCompleted] != "true" { |
| 72 | + if err := a.cleanupCalicoNetworkUnavailableConditions(ctx, log, cp.Namespace, cluster); err != nil { |
| 73 | + log.Error(err, "Failed to cleanup Calico NetworkUnavailable conditions") |
| 74 | + return ok, err |
| 75 | + } else { |
| 76 | + // Mark cleanup as completed |
| 77 | + if err := a.markCleanupCompleted(ctx, cp); err != nil { |
| 78 | + log.Error(err, "Failed to mark cleanup as completed") |
| 79 | + return ok, err |
| 80 | + } |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + // Remove cleanup annotation when overlay is enabled so cleanup can run again if overlay is disabled later |
| 85 | + if overlayEnabled && cp.Annotations[AnnotationCalicoCleanupCompleted] == "true" { |
| 86 | + if err := a.removeCleanupAnnotation(ctx, cp); err != nil { |
| 87 | + log.Error(err, "Failed to remove cleanup annotation") |
| 88 | + return ok, err |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + return ok, nil |
| 93 | +} |
| 94 | + |
| 95 | +// cleanupCalicoNetworkUnavailableConditions removes NetworkUnavailable conditions from nodes |
| 96 | +// that were set by Calico for example "CalicoIsUp" or "CalicoIsDown". |
| 97 | +func (a *actuator) cleanupCalicoNetworkUnavailableConditions( |
| 98 | + ctx context.Context, |
| 99 | + log logr.Logger, |
| 100 | + namespace string, |
| 101 | + cluster *extensionscontroller.Cluster, |
| 102 | +) error { |
| 103 | + if extensionscontroller.IsHibernated(cluster) { |
| 104 | + return nil |
| 105 | + } |
| 106 | + |
| 107 | + _, shootClient, err := util.NewClientForShoot(ctx, a.client, namespace, client.Options{}, extensionsconfigv1alpha1.RESTOptions{}) |
| 108 | + if err != nil { |
| 109 | + return fmt.Errorf("could not create shoot client: %w", err) |
| 110 | + } |
| 111 | + |
| 112 | + nodes := &corev1.NodeList{} |
| 113 | + if err := shootClient.List(ctx, nodes); err != nil { |
| 114 | + return fmt.Errorf("could not list nodes in shoot cluster: %w", err) |
| 115 | + } |
| 116 | + |
| 117 | + for _, node := range nodes.Items { |
| 118 | + if err := a.cleanupNodeNetworkUnavailableCondition(ctx, log, shootClient, &node); err != nil { |
| 119 | + log.Error(err, "Failed to cleanup NetworkUnavailable condition from node", "node", node.Name) |
| 120 | + return err |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + return nil |
| 125 | +} |
| 126 | + |
| 127 | +// cleanupNodeNetworkUnavailableCondition removes the NetworkUnavailable condition from a node |
| 128 | +// if it was set by Calico. |
| 129 | +func (a *actuator) cleanupNodeNetworkUnavailableCondition( |
| 130 | + ctx context.Context, |
| 131 | + log logr.Logger, |
| 132 | + shootClient client.Client, |
| 133 | + node *corev1.Node, |
| 134 | +) error { |
| 135 | + // Check if the node has a NetworkUnavailable condition set by Calico |
| 136 | + hasCondition := false |
| 137 | + for _, condition := range node.Status.Conditions { |
| 138 | + if condition.Type == NetworkUnavailableConditionType && |
| 139 | + (condition.Reason == CalicoIsUpReason || condition.Reason == CalicoIsDownReason) { |
| 140 | + hasCondition = true |
| 141 | + break |
| 142 | + } |
| 143 | + } |
| 144 | + |
| 145 | + if !hasCondition { |
| 146 | + return nil |
| 147 | + } |
| 148 | + |
| 149 | + // Remove the NetworkUnavailable condition |
| 150 | + return retry.RetryOnConflict(retry.DefaultRetry, func() error { |
| 151 | + // Get the latest version of the node |
| 152 | + currentNode := &corev1.Node{} |
| 153 | + if err := shootClient.Get(ctx, client.ObjectKey{Name: node.Name}, currentNode); err != nil { |
| 154 | + return err |
| 155 | + } |
| 156 | + |
| 157 | + // Filter out the NetworkUnavailable condition set by Calico |
| 158 | + var newConditions []corev1.NodeCondition |
| 159 | + removed := false |
| 160 | + for _, condition := range currentNode.Status.Conditions { |
| 161 | + if condition.Type == NetworkUnavailableConditionType && |
| 162 | + (condition.Reason == CalicoIsUpReason || condition.Reason == CalicoIsDownReason) { |
| 163 | + removed = true |
| 164 | + log.Info("Removing NetworkUnavailable condition set by Calico", "node", currentNode.Name, "reason", condition.Reason) |
| 165 | + continue |
| 166 | + } |
| 167 | + newConditions = append(newConditions, condition) |
| 168 | + } |
| 169 | + |
| 170 | + // Only update if we actually removed a condition |
| 171 | + if !removed { |
| 172 | + return nil |
| 173 | + } |
| 174 | + |
| 175 | + currentNode.Status.Conditions = newConditions |
| 176 | + return shootClient.Status().Update(ctx, currentNode) |
| 177 | + }) |
| 178 | +} |
| 179 | + |
| 180 | +// markCleanupCompleted marks the cleanup as completed by adding an annotation to the ControlPlane resource. |
| 181 | +func (a *actuator) markCleanupCompleted(ctx context.Context, cp *extensionsv1alpha1.ControlPlane) error { |
| 182 | + patch := client.MergeFrom(cp.DeepCopy()) |
| 183 | + if cp.Annotations == nil { |
| 184 | + cp.Annotations = make(map[string]string) |
| 185 | + } |
| 186 | + cp.Annotations[AnnotationCalicoCleanupCompleted] = "true" |
| 187 | + return a.client.Patch(ctx, cp, patch) |
| 188 | +} |
| 189 | + |
| 190 | +// removeCleanupAnnotation removes the cleanup completion annotation from the ControlPlane resource. |
| 191 | +func (a *actuator) removeCleanupAnnotation(ctx context.Context, cp *extensionsv1alpha1.ControlPlane) error { |
| 192 | + patch := client.MergeFrom(cp.DeepCopy()) |
| 193 | + delete(cp.Annotations, AnnotationCalicoCleanupCompleted) |
| 194 | + return a.client.Patch(ctx, cp, patch) |
| 195 | +} |
0 commit comments