Skip to content

Commit 95cd0a1

Browse files
authored
Merge pull request #9031 from Lyndon-Li/vgdp-ms-cancel-pvb-pvr
Cancel pvb/pvr on velero server restart
2 parents 9f9c3e8 + 18f8172 commit 95cd0a1

7 files changed

Lines changed: 101 additions & 10 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix issue #8961, cancel PVB/PVR on Velero server restart

pkg/cmd/server/server.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ func markInProgressBackupsFailed(ctx context.Context, client ctrlclient.Client,
961961
}
962962
log.WithField("backup", backup.GetName()).Warn(updated.Status.FailureReason)
963963
markDataUploadsCancel(ctx, client, backup, log)
964+
markPodVolumeBackupsCancel(ctx, client, backup, log)
964965
}
965966
}
966967

@@ -983,8 +984,10 @@ func markInProgressRestoresFailed(ctx context.Context, client ctrlclient.Client,
983984
log.WithError(errors.WithStack(err)).Errorf("failed to patch restore %q", restore.GetName())
984985
continue
985986
}
987+
986988
log.WithField("restore", restore.GetName()).Warn(updated.Status.FailureReason)
987989
markDataDownloadsCancel(ctx, client, restore, log)
990+
markPodVolumeRestoresCancel(ctx, client, restore, log)
988991
}
989992
}
990993

@@ -1069,3 +1072,90 @@ func markDataDownloadsCancel(ctx context.Context, client ctrlclient.Client, rest
10691072
}
10701073
}
10711074
}
1075+
1076+
func markPodVolumeBackupsCancel(ctx context.Context, client ctrlclient.Client, backup velerov1api.Backup, log logrus.FieldLogger) {
1077+
pvbs := &velerov1api.PodVolumeBackupList{}
1078+
1079+
if err := client.List(ctx, pvbs, &ctrlclient.ListOptions{
1080+
Namespace: backup.GetNamespace(),
1081+
LabelSelector: labels.Set(map[string]string{
1082+
velerov1api.BackupUIDLabel: string(backup.GetUID()),
1083+
}).AsSelector(),
1084+
}); err != nil {
1085+
log.WithError(errors.WithStack(err)).Error("failed to list PVBs")
1086+
return
1087+
}
1088+
1089+
for i := range pvbs.Items {
1090+
pvb := pvbs.Items[i]
1091+
if pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseAccepted ||
1092+
pvb.Status.Phase == velerov1api.PodVolumeBackupPhasePrepared ||
1093+
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseInProgress ||
1094+
pvb.Status.Phase == velerov1api.PodVolumeBackupPhaseNew ||
1095+
pvb.Status.Phase == "" {
1096+
err := controller.UpdatePVBWithRetry(ctx, client, types.NamespacedName{Namespace: pvb.Namespace, Name: pvb.Name}, log.WithField("PVB", pvb.Name),
1097+
func(pvb *velerov1api.PodVolumeBackup) bool {
1098+
if pvb.Spec.Cancel {
1099+
return false
1100+
}
1101+
1102+
pvb.Spec.Cancel = true
1103+
pvb.Status.Message = fmt.Sprintf("PVB is in status %q during the velero server starting, mark it as cancel", pvb.Status.Phase)
1104+
1105+
return true
1106+
})
1107+
1108+
if err != nil {
1109+
log.WithError(errors.WithStack(err)).Errorf("failed to mark PVB %q cancel", pvb.GetName())
1110+
continue
1111+
}
1112+
log.WithField("PVB is mark for cancel due to server restart", pvb.GetName()).Warn(pvb.Status.Message)
1113+
}
1114+
}
1115+
}
1116+
1117+
func markPodVolumeRestoresCancel(ctx context.Context, client ctrlclient.Client, restore velerov1api.Restore, log logrus.FieldLogger) {
1118+
pvrs := &velerov1api.PodVolumeRestoreList{}
1119+
1120+
if err := client.List(ctx, pvrs, &ctrlclient.ListOptions{
1121+
Namespace: restore.GetNamespace(),
1122+
LabelSelector: labels.Set(map[string]string{
1123+
velerov1api.RestoreUIDLabel: string(restore.GetUID()),
1124+
}).AsSelector(),
1125+
}); err != nil {
1126+
log.WithError(errors.WithStack(err)).Error("failed to list PVRs")
1127+
return
1128+
}
1129+
1130+
for i := range pvrs.Items {
1131+
pvr := pvrs.Items[i]
1132+
if controller.IsLegacyPVR(&pvr) {
1133+
log.WithField("PVR", pvr.GetName()).Warn("Found a legacy PVR during velero server restart, cannot stop it")
1134+
continue
1135+
}
1136+
1137+
if pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseAccepted ||
1138+
pvr.Status.Phase == velerov1api.PodVolumeRestorePhasePrepared ||
1139+
pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseInProgress ||
1140+
pvr.Status.Phase == velerov1api.PodVolumeRestorePhaseNew ||
1141+
pvr.Status.Phase == "" {
1142+
err := controller.UpdatePVRWithRetry(ctx, client, types.NamespacedName{Namespace: pvr.Namespace, Name: pvr.Name}, log.WithField("PVR", pvr.Name),
1143+
func(pvr *velerov1api.PodVolumeRestore) bool {
1144+
if pvr.Spec.Cancel {
1145+
return false
1146+
}
1147+
1148+
pvr.Spec.Cancel = true
1149+
pvr.Status.Message = fmt.Sprintf("PVR is in status %q during the velero server starting, mark it as cancel", pvr.Status.Phase)
1150+
1151+
return true
1152+
})
1153+
1154+
if err != nil {
1155+
log.WithError(errors.WithStack(err)).Errorf("failed to mark PVR %q cancel", pvr.GetName())
1156+
continue
1157+
}
1158+
log.WithField("PVR is mark for cancel due to server restart", pvr.GetName()).Warn(pvr.Status.Message)
1159+
}
1160+
}
1161+
}

pkg/controller/data_download_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,7 @@ func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, name
885885
err := client.Update(ctx, dd)
886886
if err != nil {
887887
if apierrors.IsConflict(err) {
888-
log.Warnf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
888+
log.Debugf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
889889
return false, nil
890890
} else {
891891
return false, errors.Wrapf(err, "error updating datadownload %s/%s", dd.Namespace, dd.Name)

pkg/controller/data_upload_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namesp
990990
err := client.Update(ctx, du)
991991
if err != nil {
992992
if apierrors.IsConflict(err) {
993-
log.Warnf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
993+
log.Debugf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
994994
return false, nil
995995
} else {
996996
return false, errors.Wrapf(err, "error updating dataupload with error %s/%s", du.Namespace, du.Name)

pkg/controller/pod_volume_backup_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ func UpdatePVBWithRetry(ctx context.Context, client client.Client, namespacedNam
833833
err := client.Update(ctx, pvb)
834834
if err != nil {
835835
if apierrors.IsConflict(err) {
836-
log.Warnf("failed to update PVB for %s/%s and will retry it", pvb.Namespace, pvb.Name)
836+
log.Debugf("failed to update PVB for %s/%s and will retry it", pvb.Namespace, pvb.Name)
837837
return false, nil
838838
} else {
839839
return false, errors.Wrapf(err, "error updating PVB with error %s/%s", pvb.Namespace, pvb.Name)

pkg/controller/pod_volume_restore_controller.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ func (r *PodVolumeRestoreReconciler) closeDataPath(ctx context.Context, pvrName
545545
func (r *PodVolumeRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
546546
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
547547
pvr := object.(*velerov1api.PodVolumeRestore)
548-
if isLegacyPVR(pvr) {
548+
if IsLegacyPVR(pvr) {
549549
return false
550550
}
551551

@@ -570,7 +570,7 @@ func (r *PodVolumeRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
570570

571571
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
572572
pvr := obj.(*velerov1api.PodVolumeRestore)
573-
return !isLegacyPVR(pvr)
573+
return !IsLegacyPVR(pvr)
574574
})
575575

576576
return ctrl.NewControllerManagedBy(mgr).
@@ -620,7 +620,7 @@ func (r *PodVolumeRestoreReconciler) findPVRForTargetPod(ctx context.Context, po
620620

621621
requests := []reconcile.Request{}
622622
for _, item := range list.Items {
623-
if isLegacyPVR(&item) {
623+
if IsLegacyPVR(&item) {
624624
continue
625625
}
626626

@@ -897,7 +897,7 @@ func UpdatePVRWithRetry(ctx context.Context, client client.Client, namespacedNam
897897
err := client.Update(ctx, pvr)
898898
if err != nil {
899899
if apierrors.IsConflict(err) {
900-
log.Warnf("failed to update PVR for %s/%s and will retry it", pvr.Namespace, pvr.Name)
900+
log.Debugf("failed to update PVR for %s/%s and will retry it", pvr.Namespace, pvr.Name)
901901
return false, nil
902902
} else {
903903
return false, errors.Wrapf(err, "error updating PVR %s/%s", pvr.Namespace, pvr.Name)

pkg/controller/pod_volume_restore_controller_legacy.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ func (c *PodVolumeRestoreReconcilerLegacy) SetupWithManager(mgr ctrl.Manager) er
205205
// By watching the pods, we can trigger the PVR reconciliation again once the pod is finally scheduled on the node.
206206
pred := kube.NewAllEventPredicate(func(obj client.Object) bool {
207207
pvr := obj.(*velerov1api.PodVolumeRestore)
208-
return isLegacyPVR(pvr)
208+
return IsLegacyPVR(pvr)
209209
})
210210

211211
return ctrl.NewControllerManagedBy(mgr).Named("podvolumerestorelegacy").
@@ -229,7 +229,7 @@ func (c *PodVolumeRestoreReconcilerLegacy) findVolumeRestoresForPod(ctx context.
229229

230230
requests := []reconcile.Request{}
231231
for _, item := range list.Items {
232-
if !isLegacyPVR(&item) {
232+
if !IsLegacyPVR(&item) {
233233
continue
234234
}
235235

@@ -359,6 +359,6 @@ func (c *PodVolumeRestoreReconcilerLegacy) closeDataPath(ctx context.Context, pv
359359
c.dataPathMgr.RemoveAsyncBR(pvbName)
360360
}
361361

362-
func isLegacyPVR(pvr *velerov1api.PodVolumeRestore) bool {
362+
func IsLegacyPVR(pvr *velerov1api.PodVolumeRestore) bool {
363363
return pvr.Spec.UploaderType == uploader.ResticType
364364
}

0 commit comments

Comments
 (0)