Skip to content

Commit ba02916

Browse files
refactor: Optimize VSC handle readiness polling for VSS backups
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat> Signed-off-by: Scott Seago <sseago@redhat.com>
1 parent a31f4ab commit ba02916

2 files changed

Lines changed: 84 additions & 58 deletions

File tree

changelogs/unreleased/9602-sseago

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimize VSC handle readiness polling for VSS backups

pkg/util/csi/volume_snapshot.go

Lines changed: 83 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -598,72 +598,97 @@ func WaitUntilVSCHandleIsReady(
598598
log logrus.FieldLogger,
599599
csiSnapshotTimeout time.Duration,
600600
) (*snapshotv1api.VolumeSnapshotContent, error) {
601-
// We'll wait 10m for the VSC to be reconciled polling
602-
// every 5s unless backup's csiSnapshotTimeout is set
603-
interval := 5 * time.Second
601+
// We'll wait for the VSC to be reconciled, trying a fast poll interval first
602+
// before falling back to a slower poll interval for the full csiSnapshotTimeout.
604603
vsc := new(snapshotv1api.VolumeSnapshotContent)
604+
var interval time.Duration
605+
606+
pollFunc := func(ctx context.Context) (bool, error) {
607+
vs := new(snapshotv1api.VolumeSnapshot)
608+
if err := crClient.Get(
609+
ctx,
610+
crclient.ObjectKeyFromObject(volSnap),
611+
vs,
612+
); err != nil {
613+
return false,
614+
errors.Wrapf(
615+
err,
616+
"failed to get volumesnapshot %s/%s",
617+
volSnap.Namespace, volSnap.Name,
618+
)
619+
}
620+
621+
if vs.Status == nil || vs.Status.BoundVolumeSnapshotContentName == nil {
622+
log.Infof("Waiting for CSI driver to reconcile volumesnapshot %s/%s. Retrying in %ds",
623+
volSnap.Namespace, volSnap.Name, interval/time.Second)
624+
return false, nil
625+
}
626+
627+
if err := crClient.Get(
628+
ctx,
629+
crclient.ObjectKey{
630+
Name: *vs.Status.BoundVolumeSnapshotContentName,
631+
},
632+
vsc,
633+
); err != nil {
634+
return false,
635+
errors.Wrapf(
636+
err,
637+
"failed to get VolumeSnapshotContent %s for VolumeSnapshot %s/%s",
638+
*vs.Status.BoundVolumeSnapshotContentName, vs.Namespace, vs.Name,
639+
)
640+
}
641+
642+
// we need to wait for the VolumeSnapshotContent
643+
// to have a snapshot handle because during restore,
644+
// we'll use that snapshot handle as the source for
645+
// the VolumeSnapshotContent so it's statically
646+
// bound to the existing snapshot.
647+
if vsc.Status == nil ||
648+
vsc.Status.SnapshotHandle == nil {
649+
log.Infof(
650+
"Waiting for VolumeSnapshotContents %s to have snapshot handle. Retrying in %ds",
651+
vsc.Name, interval/time.Second)
652+
if vsc.Status != nil &&
653+
vsc.Status.Error != nil {
654+
log.Warnf("VolumeSnapshotContent %s has error: %v",
655+
vsc.Name, *vsc.Status.Error.Message)
656+
}
657+
return false, nil
658+
}
659+
660+
return true, nil
661+
}
605662

663+
// The short interval for the first ten seconds is due to the fact that
664+
// Microsoft VSS backups have a hard-coded unfreeze call after 10 seconds,
665+
// so we need to minimize waiting time during the first 10 seconds.
666+
// First poll with a short interval and timeout.
667+
interval = 1 * time.Second
668+
timeout := 10 * time.Second
606669
err := wait.PollUntilContextTimeout(
607670
context.Background(),
608671
interval,
609-
csiSnapshotTimeout,
672+
timeout,
610673
true,
611-
func(ctx context.Context) (bool, error) {
612-
vs := new(snapshotv1api.VolumeSnapshot)
613-
if err := crClient.Get(
614-
ctx,
615-
crclient.ObjectKeyFromObject(volSnap),
616-
vs,
617-
); err != nil {
618-
return false,
619-
errors.Wrapf(
620-
err,
621-
"failed to get volumesnapshot %s/%s",
622-
volSnap.Namespace, volSnap.Name,
623-
)
624-
}
625-
626-
if vs.Status == nil || vs.Status.BoundVolumeSnapshotContentName == nil {
627-
log.Infof("Waiting for CSI driver to reconcile volumesnapshot %s/%s. Retrying in %ds",
628-
volSnap.Namespace, volSnap.Name, interval/time.Second)
629-
return false, nil
630-
}
631-
632-
if err := crClient.Get(
633-
ctx,
634-
crclient.ObjectKey{
635-
Name: *vs.Status.BoundVolumeSnapshotContentName,
636-
},
637-
vsc,
638-
); err != nil {
639-
return false,
640-
errors.Wrapf(
641-
err,
642-
"failed to get VolumeSnapshotContent %s for VolumeSnapshot %s/%s",
643-
*vs.Status.BoundVolumeSnapshotContentName, vs.Namespace, vs.Name,
644-
)
645-
}
674+
pollFunc,
675+
)
646676

647-
// we need to wait for the VolumeSnapshotContent
648-
// to have a snapshot handle because during restore,
649-
// we'll use that snapshot handle as the source for
650-
// the VolumeSnapshotContent so it's statically
651-
// bound to the existing snapshot.
652-
if vsc.Status == nil ||
653-
vsc.Status.SnapshotHandle == nil {
654-
log.Infof(
655-
"Waiting for VolumeSnapshotContents %s to have snapshot handle. Retrying in %ds",
656-
vsc.Name, interval/time.Second)
657-
if vsc.Status != nil &&
658-
vsc.Status.Error != nil {
659-
log.Warnf("VolumeSnapshotContent %s has error: %v",
660-
vsc.Name, *vsc.Status.Error.Message)
661-
}
662-
return false, nil
663-
}
677+
if err == nil {
678+
return vsc, nil
679+
}
680+
if !wait.Interrupted(err) {
681+
return nil, err
682+
}
664683

665-
return true, nil
666-
},
684+
// If the first poll timed out, poll with a longer interval and the full timeout.
685+
interval = 5 * time.Second
686+
err = wait.PollUntilContextTimeout(
687+
context.Background(),
688+
interval,
689+
csiSnapshotTimeout,
690+
true,
691+
pollFunc,
667692
)
668693

669694
if err != nil {

0 commit comments

Comments
 (0)