Skip to content

Commit 1c1fb95

Browse files
authored
Merge pull request #212 from JackZxj/refactor/fix-deletion-block
fix: deletion was blocked when other clusters are not ready
2 parents c57d144 + 39aac90 commit 1c1fb95

File tree

5 files changed

+137
-25
lines changed

5 files changed

+137
-25
lines changed

pkg/apis/core/v1alpha1/types_federatedobject.go

+1
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ type PropagationStatusType string
219219
const (
220220
ClusterPropagationOK PropagationStatusType = "OK"
221221
WaitingForRemoval PropagationStatusType = "WaitingForRemoval"
222+
PendingCreate PropagationStatusType = "PendingCreate"
222223

223224
// Cluster-specific errors
224225

pkg/controllers/automigration/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi
269269
qualifiedName.Namespace,
270270
qualifiedName.Name,
271271
)
272-
if err != nil {
272+
if err != nil && !apierrors.IsNotFound(err) {
273273
keyedLogger.Error(err, "Failed to get federated object from store")
274274
return worker.StatusError
275275
}

pkg/controllers/federatedcluster/clusterstatus.go

+3-5
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,9 @@ func (c *FederatedClusterController) collectIndividualClusterStatus(
148148
if isReadyStatusChanged(oldClusterStatus, readyStatus) {
149149
switch readyStatus {
150150
case corev1.ConditionTrue:
151-
c.eventRecorder.Eventf(cluster, readyReason, readyMessage, "Cluster is ready")
152-
case corev1.ConditionFalse:
153-
c.eventRecorder.Eventf(cluster, readyReason, readyMessage, "Cluster is not ready")
154-
case corev1.ConditionUnknown:
155-
c.eventRecorder.Eventf(cluster, readyReason, readyMessage, "Cluster ready state is unknown")
151+
c.eventRecorder.Eventf(cluster, corev1.EventTypeNormal, readyReason, readyMessage)
152+
case corev1.ConditionFalse, corev1.ConditionUnknown:
153+
c.eventRecorder.Eventf(cluster, corev1.EventTypeWarning, readyReason, readyMessage)
156154
}
157155
}
158156

pkg/controllers/statusaggregator/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ func (a *StatusAggregator) reconcile(ctx context.Context, key reconcileKey) (sta
324324
key.namespace,
325325
federatedName,
326326
)
327-
if err != nil {
327+
if err != nil && !apierrors.IsNotFound(err) {
328328
logger.Error(err, "Failed to get object from store")
329329
return worker.StatusError
330330
}

pkg/controllers/sync/controller.go

+131-18
Original file line numberDiff line numberDiff line change
@@ -415,13 +415,29 @@ func (s *SyncController) reconcile(ctx context.Context, federatedName common.Qua
415415
fedResource.RecordError("EnsureFinalizerError", errors.Wrap(err, "Failed to ensure finalizer"))
416416
return worker.StatusError
417417
}
418-
419-
return s.syncToClusters(ctx, fedResource)
418+
clustersToSync, selectedClusters, err := s.prepareToSync(ctx, fedResource)
419+
if err != nil {
420+
fedResource.RecordError("PrepareToSyncError", errors.Wrap(err, "Failed to prepare to sync"))
421+
return worker.StatusError
422+
}
423+
return s.syncToClusters(ctx, fedResource, clustersToSync, selectedClusters)
420424
}
421425

422-
// syncToClusters ensures that the state of the given object is
423-
// synchronized to member clusters.
424-
func (s *SyncController) syncToClusters(ctx context.Context, fedResource FederatedResource) worker.Result {
426+
// prepareToSync performs the following preprocessing steps required to sync federated objects to selected member clusters:
427+
// 1. Compute the list of selected member clusters from the placement field.
428+
// 2. Compute the list of member clusters that requires an operation to be dispatched.
429+
// 3. For newly selected clusters, update the PropagationStatus for these clusters to PendingCreate.
430+
//
431+
// The PendingCreate status allows us to safely skip checking of clusters during object deletion when PropagationStatus is
432+
// empty. If not, it might be that the object was created but we failed to update the federated object's status previously.
433+
func (s *SyncController) prepareToSync(
434+
ctx context.Context,
435+
fedResource FederatedResource,
436+
) (
437+
requireSync []*fedcorev1a1.FederatedCluster,
438+
selectedClusters sets.Set[string],
439+
err error,
440+
) {
425441
keyedLogger := klog.FromContext(ctx)
426442

427443
clusters, err := s.fedInformerManager.GetJoinedClusters()
@@ -430,10 +446,81 @@ func (s *SyncController) syncToClusters(ctx context.Context, fedResource Federat
430446
string(fedcorev1a1.ClusterRetrievalFailed),
431447
errors.Wrap(err, "Failed to retrieve list of clusters"),
432448
)
433-
return s.setFederatedStatus(ctx, fedResource, fedcorev1a1.ClusterRetrievalFailed, nil)
449+
result := s.setFederatedStatus(ctx, fedResource, fedcorev1a1.ClusterRetrievalFailed, nil)
450+
if result != worker.StatusAllOK {
451+
keyedLogger.Error(nil, "Failed to set federated status", "result", result.String())
452+
}
453+
return nil, nil, err
454+
}
455+
clusterMap := make(map[string]*fedcorev1a1.FederatedCluster, len(clusters))
456+
for _, cluster := range clusters {
457+
clusterMap[cluster.Name] = cluster
434458
}
435459

436460
selectedClusterNames := fedResource.ComputePlacement(clusters)
461+
pendingCreateClusters := selectedClusterNames.Clone()
462+
status := fedResource.Object().GetStatus()
463+
for _, cluster := range status.Clusters {
464+
pendingCreateClusters.Delete(cluster.Cluster)
465+
if cluster, exist := clusterMap[cluster.Cluster]; exist {
466+
requireSync = append(requireSync, cluster)
467+
}
468+
}
469+
470+
if pendingCreateClusters.Len() == 0 {
471+
return requireSync, selectedClusterNames, nil
472+
}
473+
for cluster := range pendingCreateClusters {
474+
if cluster, exist := clusterMap[cluster]; exist && cluster.GetDeletionTimestamp().IsZero() {
475+
status.Clusters = append(status.Clusters, fedcorev1a1.PropagationStatus{
476+
Cluster: cluster.Name,
477+
Status: fedcorev1a1.PendingCreate,
478+
})
479+
requireSync = append(requireSync, cluster)
480+
}
481+
}
482+
483+
keyedLogger.V(1).Info("Update clusters pending object creation",
484+
"clusters", strings.Join(sets.List(pendingCreateClusters), ","))
485+
obj := fedResource.Object()
486+
objNamespace := obj.GetNamespace()
487+
objName := obj.GetName()
488+
// If the underlying resource has changed, attempt to retrieve and
489+
// update it repeatedly.
490+
err = wait.PollImmediateWithContext(ctx, 1*time.Second, 5*time.Second, func(ctx context.Context) (bool, error) {
491+
var err error
492+
obj.GetStatus().Clusters = status.Clusters
493+
obj, err = fedobjectadapters.UpdateStatus(ctx, s.fedClient.CoreV1alpha1(), obj, metav1.UpdateOptions{})
494+
if err == nil {
495+
fedResource.SetObject(obj)
496+
return true, nil
497+
}
498+
if apierrors.IsConflict(err) {
499+
obj, err = fedobjectadapters.Get(ctx, s.fedClient.CoreV1alpha1(), objNamespace, objName, metav1.GetOptions{})
500+
if err != nil {
501+
return false, errors.Wrapf(err, "failed to retrieve resource")
502+
}
503+
return false, nil
504+
}
505+
return false, errors.Wrapf(err, "failed to update resource")
506+
})
507+
if err != nil {
508+
keyedLogger.Error(err, "Failed to set propagation status")
509+
return nil, nil, err
510+
}
511+
return requireSync, selectedClusterNames, nil
512+
}
513+
514+
// syncToClusters ensures that the state of the given object is
515+
// synchronized to member clusters.
516+
func (s *SyncController) syncToClusters(
517+
ctx context.Context,
518+
fedResource FederatedResource,
519+
clusters []*fedcorev1a1.FederatedCluster,
520+
selectedClusterNames sets.Set[string],
521+
) worker.Result {
522+
keyedLogger := klog.FromContext(ctx)
523+
var err error
437524
keyedLogger.V(2).Info("Ensuring target object in clusters", "clusters", strings.Join(sets.List(selectedClusterNames), ","))
438525

439526
skipAdoptingPreexistingResources := !adoption.ShouldAdoptPreexistingResources(fedResource.Object())
@@ -693,9 +780,7 @@ func (s *SyncController) ensureRemovalFromClusters(ctx context.Context, fedResou
693780
remainingClusters := []string{}
694781
ok, err := s.handleDeletionInClusters(
695782
ctx,
696-
fedResource.TargetGVK(),
697-
fedResource.TargetGVR(),
698-
fedResource.TargetName(),
783+
fedResource,
699784
func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured) {
700785
remainingClusters = append(remainingClusters, clusterName)
701786
s.removeFromCluster(ctx, dispatcher, clusterName, fedResource, clusterObj, true)
@@ -727,18 +812,20 @@ func (s *SyncController) ensureRemovalFromClusters(ctx context.Context, fedResou
727812
// the informer to cover the possibility that the resources have not
728813
// yet been cached.
729814
func (s *SyncController) checkObjectRemovedFromAllClusters(ctx context.Context, fedResource FederatedResource) error {
730-
clusters, err := s.fedInformerManager.GetJoinedClusters()
815+
keyedLogger := klog.FromContext(ctx)
816+
syncedClusters, syncedClusterNames, err := s.getSyncedClusters(fedResource)
731817
if err != nil {
732-
return errors.Wrap(err, "failed to get a list of clusters")
818+
return err
733819
}
734820

821+
keyedLogger.V(4).Info("Check object removed from clusters", "clusters", strings.Join(syncedClusterNames, ","))
735822
dispatcher := dispatch.NewCheckUnmanagedDispatcher(
736823
s.getClusterClient,
737824
fedResource.TargetGVR(),
738825
fedResource.TargetName(),
739826
)
740827
unreadyClusters := []string{}
741-
for _, cluster := range clusters {
828+
for _, cluster := range syncedClusters {
742829
if !clusterutil.IsClusterReady(&cluster.Status) {
743830
unreadyClusters = append(unreadyClusters, cluster.Name)
744831
continue
@@ -762,22 +849,24 @@ func (s *SyncController) checkObjectRemovedFromAllClusters(ctx context.Context,
762849
// each managed resource in member clusters.
763850
func (s *SyncController) handleDeletionInClusters(
764851
ctx context.Context,
765-
targetGVK schema.GroupVersionKind,
766-
targetGVR schema.GroupVersionResource,
767-
targetQualifiedName common.QualifiedName,
852+
fedResource FederatedResource,
768853
deletionFunc func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured),
769854
) (bool, error) {
770855
keyedLogger := klog.FromContext(ctx)
856+
targetGVK := fedResource.TargetGVK()
857+
targetGVR := fedResource.TargetGVR()
858+
targetQualifiedName := fedResource.TargetName()
771859

772-
clusters, err := s.fedInformerManager.GetJoinedClusters()
860+
syncedClusters, syncedClusterNames, err := s.getSyncedClusters(fedResource)
773861
if err != nil {
774-
return false, fmt.Errorf("failed to get a list of clusters: %w", err)
862+
return false, err
775863
}
776864

865+
keyedLogger.V(4).Info("Handle deletion in clusters", "clusters", strings.Join(syncedClusterNames, ","))
777866
dispatcher := dispatch.NewUnmanagedDispatcher(s.getClusterClient, targetGVR, targetQualifiedName)
778867
retrievalFailureClusters := []string{}
779868
unreadyClusters := []string{}
780-
for _, cluster := range clusters {
869+
for _, cluster := range syncedClusters {
781870
clusterName := cluster.Name
782871

783872
if !clusterutil.IsClusterReady(&cluster.Status) {
@@ -821,6 +910,30 @@ func (s *SyncController) handleDeletionInClusters(
821910
return ok, nil
822911
}
823912

913+
func (s *SyncController) getSyncedClusters(
914+
fedResource FederatedResource,
915+
) ([]*fedcorev1a1.FederatedCluster, []string, error) {
916+
clusters, err := s.fedInformerManager.GetJoinedClusters()
917+
if err != nil {
918+
return nil, nil, fmt.Errorf("failed to get the list of joined clusters: %w", err)
919+
}
920+
clusterMap := make(map[string]*fedcorev1a1.FederatedCluster, len(clusters))
921+
for _, cluster := range clusters {
922+
clusterMap[cluster.Name] = cluster
923+
}
924+
925+
status := fedResource.Object().GetStatus()
926+
syncedClusters := make([]*fedcorev1a1.FederatedCluster, 0, len(status.Clusters))
927+
syncedClusterNames := make([]string, 0, len(status.Clusters))
928+
for _, cluster := range status.Clusters {
929+
if cluster, exists := clusterMap[cluster.Cluster]; exists {
930+
syncedClusters = append(syncedClusters, cluster)
931+
syncedClusterNames = append(syncedClusterNames, cluster.Name)
932+
}
933+
}
934+
return syncedClusters, syncedClusterNames, nil
935+
}
936+
824937
func (s *SyncController) ensureFinalizer(ctx context.Context, fedResource FederatedResource) error {
825938
ctx, keyedLogger := logging.InjectLoggerValues(ctx, "finalizer-name", FinalizerSyncController)
826939

0 commit comments

Comments
 (0)