Skip to content

Commit 9c48738

Browse files
Merge pull request #49 from mpryc/issue_32
fix: stop deleting VMBT between backups to preserve checkpoint chain
2 parents b41d776 + 43879ae commit 9c48738

4 files changed

Lines changed: 234 additions & 107 deletions

File tree

internal/controller/kubevirt_dataupload_controller.go

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -835,9 +835,12 @@ func (r *KubeVirtDataUploadReconciler) cleanupDatamoverResources(ctx context.Con
835835

836836
}
837837

838-
// cleanupVMBackupResources deletes VMB and VMBT CRs in the VM namespace.
838+
// cleanupVMBackupResources deletes VMB CRs in the VM namespace.
839839
// Used during cancellation when the datamover pod won't run its own cleanup.
840-
func (r *KubeVirtDataUploadReconciler) cleanupVMBackupResources(ctx context.Context, logger logr.Logger, du *velerov2alpha1.DataUpload, vmName, vmNamespace string) {
840+
// The VMBT is intentionally preserved so KubeVirt can use it during VM lifecycle
841+
// events (restarts, migrations) to redefine libvirt checkpoints.
842+
// See https://github.com/migtools/kubevirt-datamover-controller/issues/32.
843+
func (r *KubeVirtDataUploadReconciler) cleanupVMBackupResources(ctx context.Context, logger logr.Logger, du *velerov2alpha1.DataUpload, vmNamespace string) {
841844
vmbList := &kubevirtbackupv1alpha1.VirtualMachineBackupList{}
842845
if err := r.List(ctx, vmbList, client.InNamespace(vmNamespace), client.MatchingLabels{common.LabelDataUploadUID: string(du.UID)}); err != nil {
843846
logger.Error(err, "Failed to list VMBs for cleanup")
@@ -851,20 +854,6 @@ func (r *KubeVirtDataUploadReconciler) cleanupVMBackupResources(ctx context.Cont
851854
}
852855
}
853856
}
854-
855-
vmbtList := &kubevirtbackupv1alpha1.VirtualMachineBackupTrackerList{}
856-
if err := r.List(ctx, vmbtList, client.InNamespace(vmNamespace), client.MatchingLabels{common.LabelVMNameHash: common.HashForLabel(vmName)}); err != nil {
857-
logger.Error(err, "Failed to list VMBTs for cleanup")
858-
} else {
859-
for i := range vmbtList.Items {
860-
vmbt := &vmbtList.Items[i]
861-
if err := r.Delete(ctx, vmbt); err != nil && !errors.IsNotFound(err) {
862-
logger.Error(err, "Failed to delete VMBT", "vmbt", vmbt.Name)
863-
} else {
864-
logger.Info("Deleted VMBT", "vmbt", vmbt.Name, "namespace", vmNamespace)
865-
}
866-
}
867-
}
868857
}
869858

870859
// handleCanceling processes DataUploads in Canceling phase
@@ -882,7 +871,7 @@ func (r *KubeVirtDataUploadReconciler) handleCanceling(ctx context.Context, logg
882871
// When canceling, the datamover pod won't run its cleanup, so we handle it here.
883872
vmRef, _ := common.GetVMReference(du)
884873
if vmRef != nil {
885-
r.cleanupVMBackupResources(ctx, logger, du, vmRef.Name, vmRef.Namespace)
874+
r.cleanupVMBackupResources(ctx, logger, du, vmRef.Namespace)
886875
}
887876

888877
if err := r.updatePhase(ctx, du, velerov2alpha1.DataUploadPhaseCanceled, "DataUpload canceled"); err != nil {
@@ -1011,48 +1000,27 @@ func (r *KubeVirtDataUploadReconciler) ensureTempPVC(ctx context.Context, logger
10111000
return pvc, nil
10121001
}
10131002

1014-
// prepareVMBackupTracker creates a VirtualMachineBackupTracker for the VM,
1015-
// restoring the LatestCheckpoint from the archived VMBT in S3 if available.
1016-
// Existing VMBTs are deleted first unless they are still referenced by an active
1017-
// (non-terminal) VMB, to avoid destroying VMBTs used by concurrent DataUploads.
1003+
// prepareVMBackupTracker returns an existing on-cluster VirtualMachineBackupTracker
1004+
// for the VM if one exists, or creates a new one (restoring LatestCheckpoint from
1005+
// S3 if available). The VMBT is left on-cluster between backups so KubeVirt can
1006+
// use it during VM lifecycle events (restarts, migrations) to redefine libvirt
1007+
// checkpoints. See https://github.com/migtools/kubevirt-datamover-controller/issues/32.
10181008
func (r *KubeVirtDataUploadReconciler) prepareVMBackupTracker(ctx context.Context, logger logr.Logger, du *velerov2alpha1.DataUpload, vmName, vmNamespace string) (*kubevirtbackupv1alpha1.VirtualMachineBackupTracker, error) {
1019-
// Delete existing VMBTs for this VM before creating a new one, but only if
1020-
// they are not still referenced by an active (non-terminal) VMB. This prevents
1021-
// concurrent DataUploads from destroying each other's VMBTs.
1009+
// Check for an existing on-cluster VMBT for this VM.
10221010
existingVMBTList := &kubevirtbackupv1alpha1.VirtualMachineBackupTrackerList{}
10231011
if err := r.List(ctx, existingVMBTList, client.InNamespace(vmNamespace), client.MatchingLabels{common.LabelVMNameHash: common.HashForLabel(vmName)}); err != nil {
10241012
return nil, fmt.Errorf("failed to list existing VMBTs: %w", err)
10251013
}
10261014

1027-
// Pre-fetch all VMBs in the namespace to check references
1028-
allVMBs := &kubevirtbackupv1alpha1.VirtualMachineBackupList{}
1029-
if err := r.List(ctx, allVMBs, client.InNamespace(vmNamespace)); err != nil {
1030-
return nil, fmt.Errorf("failed to list VMBs: %w", err)
1031-
}
1032-
1033-
for i := range existingVMBTList.Items {
1034-
vmbtToDelete := &existingVMBTList.Items[i]
1035-
1036-
// Check if any non-terminal VMB still references this VMBT
1037-
referenced := false
1038-
for j := range allVMBs.Items {
1039-
if allVMBs.Items[j].Spec.Source.Name == vmbtToDelete.Name && !isVMBTerminal(&allVMBs.Items[j]) {
1040-
referenced = true
1041-
break
1042-
}
1043-
}
1044-
if referenced {
1045-
logger.Info("Skipping VMBT deletion, still referenced by active VMB",
1046-
"vmbt", vmbtToDelete.Name)
1047-
continue
1048-
}
1049-
1050-
logger.Info("Deleting existing VMBT before recreation", "vmbt", vmbtToDelete.Name)
1051-
if err := r.Delete(ctx, vmbtToDelete); err != nil && !errors.IsNotFound(err) {
1052-
return nil, fmt.Errorf("failed to delete existing VMBT %s: %w", vmbtToDelete.Name, err)
1053-
}
1015+
// Reuse the on-cluster VMBT if one exists. KubeVirt needs it to persist
1016+
// across backups for checkpoint redefinition during VM lifecycle events.
1017+
if len(existingVMBTList.Items) > 0 {
1018+
vmbt := &existingVMBTList.Items[0]
1019+
logger.Info("Reusing existing on-cluster VMBT", "vmbt", vmbt.Name)
1020+
return vmbt, nil
10541021
}
10551022

1023+
// No VMBT on-cluster (e.g., first backup or namespace was recreated).
10561024
// Try to fetch the archived VMBT from S3 to restore LatestCheckpoint.
10571025
// This is non-fatal: if BSL is unreachable or no VMBT exists yet (first backup),
10581026
// we create a fresh VMBT without a checkpoint.

internal/controller/kubevirt_dataupload_controller_test.go

Lines changed: 133 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,15 +1395,15 @@ func TestHandleAccepted_ProceedsWhenOlderDUCompleted(t *testing.T) {
13951395
// It will proceed into prepareVMBackupTracker, which will fail because
13961396
// BSL credentials aren't fully set up in this test. That's fine — we're
13971397
// testing that the serialization guard does NOT block.
1398-
// The important assertion is that DU-1's VMBT gets deleted (normal cleanup).
1399-
deletedVMBT := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
1398+
// The important assertion is that DU-1's VMBT is reused (not deleted) per issue #32.
1399+
reusedVMBT := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
14001400
getErr := fakeClient.Get(context.Background(), types.NamespacedName{
14011401
Name: vmbt1.Name, Namespace: vmNamespace,
1402-
}, deletedVMBT)
1402+
}, reusedVMBT)
14031403

1404-
// prepareVMBackupTracker should have deleted DU-1's old VMBT
1405-
if getErr == nil {
1406-
t.Error("expected DU-1's VMBT to be deleted by prepareVMBackupTracker, but it still exists")
1404+
// prepareVMBackupTracker should have reused DU-1's existing VMBT
1405+
if getErr != nil {
1406+
t.Errorf("expected DU-1's VMBT to be reused by prepareVMBackupTracker, but it was deleted: %v", getErr)
14071407
}
14081408
// We don't care about the error from handleAccepted itself — it may fail
14091409
// on BSL credential lookup. The point is it got past the serialization guard.
@@ -5513,16 +5513,17 @@ func TestPrepareVMBackupTracker_FromS3(t *testing.T) {
55135513
}
55145514
}
55155515

5516-
func TestPrepareVMBackupTracker_DeletesExisting(t *testing.T) {
5517-
// VMBT already exists in cluster → should be deleted and recreated
5516+
func TestPrepareVMBackupTracker_ReusesExisting(t *testing.T) {
5517+
// VMBT already exists in cluster → should be reused (not deleted and recreated).
5518+
// Issue #32: VMBT must persist on-cluster for KubeVirt lifecycle events.
55185519
scheme := runtime.NewScheme()
55195520
_ = velerov2alpha1.AddToScheme(scheme)
55205521
_ = kubevirtbackupv1alpha1.AddToScheme(scheme)
55215522
_ = corev1.AddToScheme(scheme)
55225523

55235524
vmName := "test-vm"
55245525
vmNamespace := "test-ns"
5525-
duName := "test-du-delete"
5526+
duName := "test-du-reuse"
55265527

55275528
du := &velerov2alpha1.DataUpload{
55285529
ObjectMeta: metav1.ObjectMeta{
@@ -5538,13 +5539,12 @@ func TestPrepareVMBackupTracker_DeletesExisting(t *testing.T) {
55385539
},
55395540
}
55405541

5541-
// Pre-create an existing VMBT with stale label
5542+
// Pre-create an existing VMBT
55425543
existingVMBT := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{
55435544
ObjectMeta: metav1.ObjectMeta{
55445545
Name: "vmbt-" + vmName + "-old",
55455546
Namespace: vmNamespace,
55465547
Labels: map[string]string{
5547-
"stale-label": "old-value",
55485548
common.LabelVMNameHash: common.HashForLabel(vmName),
55495549
},
55505550
},
@@ -5574,22 +5574,27 @@ func TestPrepareVMBackupTracker_DeletesExisting(t *testing.T) {
55745574
t.Fatalf("unexpected error: %v", err)
55755575
}
55765576

5577-
// Verify new VMBT was created (should have our label, not the stale one)
5578-
if vmbt.Labels["stale-label"] == "old-value" {
5579-
t.Error("expected old VMBT to be deleted and new one created, but old labels persist")
5580-
}
5581-
if vmbt.Annotations[common.AnnotationDataUploadName] != duName {
5582-
t.Errorf("expected new VMBT to have DataUpload annotation %q, got %q",
5583-
duName, vmbt.Annotations[common.AnnotationDataUploadName])
5577+
// Verify the existing VMBT was reused (same name)
5578+
if vmbt.Name != existingVMBT.Name {
5579+
t.Errorf("expected existing VMBT %q to be reused, got %q", existingVMBT.Name, vmbt.Name)
55845580
}
55855581
if vmbt.Labels[common.LabelVMNameHash] != common.HashForLabel(vmName) {
5586-
t.Errorf("expected new VMBT to have label %s, got %q",
5582+
t.Errorf("expected VMBT to have label %s, got %q",
55875583
common.LabelVMNameHash, vmbt.Labels[common.LabelVMNameHash])
55885584
}
5585+
5586+
// Verify the VMBT still exists on-cluster
5587+
preserved := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
5588+
if err := fakeClient.Get(context.Background(), types.NamespacedName{
5589+
Name: existingVMBT.Name, Namespace: vmNamespace,
5590+
}, preserved); err != nil {
5591+
t.Errorf("existing VMBT was deleted when it should have been reused: %v", err)
5592+
}
55895593
}
55905594

5591-
func TestPrepareVMBackupTracker_SkipsReferencedVMBT(t *testing.T) {
5592-
// A VMBT referenced by a non-terminal VMB must NOT be deleted.
5595+
func TestPrepareVMBackupTracker_ReusesVMBTEvenIfReferencedByActiveVMB(t *testing.T) {
5596+
// A VMBT referenced by a non-terminal VMB is still reused (not deleted).
5597+
// Issue #32: VMBTs are never deleted — they persist on-cluster.
55935598
scheme := runtime.NewScheme()
55945599
_ = velerov2alpha1.AddToScheme(scheme)
55955600
_ = kubevirtbackupv1alpha1.AddToScheme(scheme)
@@ -5668,25 +5673,26 @@ func TestPrepareVMBackupTracker_SkipsReferencedVMBT(t *testing.T) {
56685673
t.Fatalf("unexpected error: %v", err)
56695674
}
56705675

5671-
// New VMBT should be created
5676+
// The existing VMBT should be reused
56725677
if vmbt == nil {
5673-
t.Fatal("expected new VMBT to be created")
5678+
t.Fatal("expected VMBT to be returned")
56745679
}
5675-
if vmbt.Name == otherVMBT.Name {
5676-
t.Error("new VMBT should not be the same object as the protected VMBT")
5680+
if vmbt.Name != otherVMBT.Name {
5681+
t.Errorf("expected existing VMBT %q to be reused, got %q", otherVMBT.Name, vmbt.Name)
56775682
}
56785683

5679-
// The other VMBT must NOT be deleted
5684+
// The VMBT must still exist on-cluster
56805685
preserved := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
56815686
if err := fakeClient.Get(context.Background(), types.NamespacedName{
56825687
Name: otherVMBT.Name, Namespace: vmNamespace,
56835688
}, preserved); err != nil {
5684-
t.Errorf("referenced VMBT was deleted when it should have been preserved: %v", err)
5689+
t.Errorf("VMBT was deleted when it should have been preserved: %v", err)
56855690
}
56865691
}
56875692

5688-
func TestPrepareVMBackupTracker_DeletesVMBTWithTerminalVMB(t *testing.T) {
5689-
// A VMBT referenced by a terminal VMB (Done=True) should be deleted normally.
5693+
func TestPrepareVMBackupTracker_ReusesVMBTWithTerminalVMB(t *testing.T) {
5694+
// A VMBT referenced by a terminal VMB (Done=True) should be reused.
5695+
// Issue #32: VMBTs are never deleted — they persist on-cluster.
56905696
scheme := runtime.NewScheme()
56915697
_ = velerov2alpha1.AddToScheme(scheme)
56925698
_ = kubevirtbackupv1alpha1.AddToScheme(scheme)
@@ -5765,17 +5771,21 @@ func TestPrepareVMBackupTracker_DeletesVMBTWithTerminalVMB(t *testing.T) {
57655771
OADPNamespace: vmNamespace,
57665772
}
57675773

5768-
_, err := r.prepareVMBackupTracker(context.Background(), logr.Discard(), du, vmName, vmNamespace)
5774+
vmbt, err := r.prepareVMBackupTracker(context.Background(), logr.Discard(), du, vmName, vmNamespace)
57695775
if err != nil {
57705776
t.Fatalf("unexpected error: %v", err)
57715777
}
57725778

5773-
// The old VMBT should be deleted (its VMB is terminal)
5774-
deleted := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
5779+
// The old VMBT should be reused (not deleted)
5780+
if vmbt.Name != oldVMBT.Name {
5781+
t.Errorf("expected VMBT %q to be reused, got %q", oldVMBT.Name, vmbt.Name)
5782+
}
5783+
5784+
preserved := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
57755785
if err := fakeClient.Get(context.Background(), types.NamespacedName{
57765786
Name: oldVMBT.Name, Namespace: vmNamespace,
5777-
}, deleted); err == nil {
5778-
t.Error("expected old VMBT to be deleted (VMB is terminal), but it still exists")
5787+
}, preserved); err != nil {
5788+
t.Errorf("VMBT was deleted when it should have been preserved: %v", err)
57795789
}
57805790
}
57815791

@@ -6021,3 +6031,92 @@ func TestSafeGenerateNamePrefix(t *testing.T) {
60216031
})
60226032
}
60236033
}
6034+
6035+
// TestCleanupVMBackupResources_PreservesVMBT verifies that cleanupVMBackupResources
6036+
// deletes the VMB but preserves the VMBT on-cluster (issue #32).
6037+
func TestCleanupVMBackupResources_PreservesVMBT(t *testing.T) {
6038+
scheme := runtime.NewScheme()
6039+
_ = velerov2alpha1.AddToScheme(scheme)
6040+
_ = kubevirtbackupv1alpha1.AddToScheme(scheme)
6041+
_ = corev1.AddToScheme(scheme)
6042+
6043+
vmName := "test-vm"
6044+
vmNamespace := "test-ns"
6045+
6046+
du := &velerov2alpha1.DataUpload{
6047+
ObjectMeta: metav1.ObjectMeta{
6048+
Name: "test-du-cleanup",
6049+
Namespace: "openshift-adp",
6050+
UID: types.UID("cleanup-uid"),
6051+
},
6052+
Spec: velerov2alpha1.DataUploadSpec{
6053+
DataMover: common.DataMoverKubeVirt,
6054+
},
6055+
}
6056+
6057+
// VMB that should be deleted during cleanup
6058+
vmb := &kubevirtbackupv1alpha1.VirtualMachineBackup{
6059+
ObjectMeta: metav1.ObjectMeta{
6060+
Name: "vmb-test-cleanup",
6061+
Namespace: vmNamespace,
6062+
Labels: map[string]string{
6063+
common.LabelDataUploadUID: string(du.UID),
6064+
},
6065+
},
6066+
Spec: kubevirtbackupv1alpha1.VirtualMachineBackupSpec{
6067+
Source: corev1.TypedLocalObjectReference{
6068+
APIGroup: strPtr("backup.kubevirt.io"),
6069+
Kind: "VirtualMachineBackupTracker",
6070+
Name: "vmbt-test-cleanup",
6071+
},
6072+
},
6073+
}
6074+
6075+
// VMBT that should be PRESERVED during cleanup
6076+
vmbt := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{
6077+
ObjectMeta: metav1.ObjectMeta{
6078+
Name: "vmbt-test-cleanup",
6079+
Namespace: vmNamespace,
6080+
Labels: map[string]string{
6081+
common.LabelVMNameHash: common.HashForLabel(vmName),
6082+
},
6083+
},
6084+
Spec: kubevirtbackupv1alpha1.VirtualMachineBackupTrackerSpec{
6085+
Source: corev1.TypedLocalObjectReference{
6086+
APIGroup: strPtr("kubevirt.io"),
6087+
Kind: "VirtualMachine",
6088+
Name: vmName,
6089+
},
6090+
},
6091+
}
6092+
6093+
fakeClient := fake.NewClientBuilder().
6094+
WithScheme(scheme).
6095+
WithObjects(du, vmb, vmbt).
6096+
Build()
6097+
6098+
r := &KubeVirtDataUploadReconciler{
6099+
Client: fakeClient,
6100+
Scheme: scheme,
6101+
Log: logr.Discard(),
6102+
OADPNamespace: "openshift-adp",
6103+
}
6104+
6105+
r.cleanupVMBackupResources(context.Background(), logr.Discard(), du, vmNamespace)
6106+
6107+
// VMB should be deleted
6108+
deletedVMB := &kubevirtbackupv1alpha1.VirtualMachineBackup{}
6109+
if err := fakeClient.Get(context.Background(), types.NamespacedName{
6110+
Name: vmb.Name, Namespace: vmNamespace,
6111+
}, deletedVMB); err == nil {
6112+
t.Error("VMB should have been deleted during cleanup, but it still exists")
6113+
}
6114+
6115+
// VMBT should be PRESERVED (not deleted)
6116+
preservedVMBT := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
6117+
if err := fakeClient.Get(context.Background(), types.NamespacedName{
6118+
Name: vmbt.Name, Namespace: vmNamespace,
6119+
}, preservedVMBT); err != nil {
6120+
t.Errorf("VMBT should have been preserved during cleanup, but it was deleted: %v", err)
6121+
}
6122+
}

pkg/uploader/run.go

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -635,10 +635,11 @@ func reconstructVMBT(cfg *UploaderConfig) *kubevirtbackupv1alpha1.VirtualMachine
635635
}
636636
}
637637

638-
// cleanupKubeResources deletes VMB and VMBT CRs from the cluster after they have
639-
// been archived to S3. This is non-fatal — if deletion fails, we log a warning
640-
// but don't fail the upload. Stale CRs will be cleaned up on the next backup cycle
641-
// (prepareVMBackupTracker deletes before recreating).
638+
// cleanupKubeResources deletes VMB CRs from the cluster after they have been
639+
// archived to S3. The VMBT is intentionally preserved so KubeVirt can use it
640+
// during VM lifecycle events (restarts, migrations) to redefine libvirt checkpoints.
641+
// This is non-fatal — if deletion fails, we log a warning but don't fail the upload.
642+
// See https://github.com/migtools/kubevirt-datamover-controller/issues/32.
642643
func cleanupKubeResources(ctx context.Context, k8sClient client.Client, cfg *UploaderConfig, logger logr.Logger) {
643644
// Delete VMB by constructing a minimal object with just name/namespace.
644645
// No need to Get first — Delete with NotFound is a no-op.
@@ -655,21 +656,6 @@ func cleanupKubeResources(ctx context.Context, k8sClient client.Client, cfg *Upl
655656
logger.Info("Deleted VMB from cluster", "vmb", cfg.VMNamespace+"/"+cfg.VMBName)
656657
}
657658
}
658-
659-
// Delete VMBT
660-
if cfg.VMBTName != "" {
661-
vmbt := &kubevirtbackupv1alpha1.VirtualMachineBackupTracker{}
662-
vmbt.Name = cfg.VMBTName
663-
vmbt.Namespace = cfg.VMNamespace
664-
if err := k8sClient.Delete(ctx, vmbt); err != nil {
665-
if !apierrors.IsNotFound(err) {
666-
logger.Info("Failed to delete VMBT from cluster",
667-
"vmbt", cfg.VMNamespace+"/"+cfg.VMBTName, "reason", err.Error())
668-
}
669-
} else {
670-
logger.Info("Deleted VMBT from cluster", "vmbt", cfg.VMNamespace+"/"+cfg.VMBTName)
671-
}
672-
}
673659
}
674660

675661
// newKubeClient creates an in-cluster Kubernetes client with KubeVirt backup types registered.

0 commit comments

Comments
 (0)