Skip to content

Commit 026f8cd

Browse files
committed
fix: remove unused VSErrorFirstObservedTimeAnnotation and update error handling in VolumeSnapshot progress
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>
1 parent 543b77a commit 026f8cd

3 files changed

Lines changed: 39 additions & 50 deletions

File tree

pkg/apis/velero/v1/labels_annotations.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,4 @@ const (
178178

179179
// Annotation prefix on Backup to override VGS class per CSI driver
180180
VolumeGroupSnapshotClassAnnotationBackupPrefix = "velero.io/csi-volumegroupsnapshot-class_"
181-
182-
// VSErrorFirstObservedTimeAnnotation is the annotation key used to record the time
183-
// when a VolumeSnapshot error was first observed during an async backup operation.
184-
// This is used by Progress() to detect errors that persist beyond CSISnapshotTimeout.
185-
VSErrorFirstObservedTimeAnnotation = "velero.io/vs-error-first-observed-time"
186181
)

pkg/backup/actions/csi/volumesnapshot_action.go

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -295,37 +295,13 @@ func (p *volumeSnapshotBackupItemAction) Progress(
295295
errorMessage = *vs.Status.Error.Message
296296
}
297297

298-
now := time.Now()
299-
firstObservedTime := now
300-
301-
if existingTime, ok := vs.Annotations[velerov1api.VSErrorFirstObservedTimeAnnotation]; ok {
302-
if t, err := time.Parse(time.RFC3339, existingTime); err == nil {
303-
firstObservedTime = t
304-
} else {
305-
p.log.Warnf("VolumeSnapshot %s/%s has an unparsable %s annotation value %q, resetting timer: %v",
306-
vs.Namespace, vs.Name, velerov1api.VSErrorFirstObservedTimeAnnotation, existingTime, err)
307-
}
308-
} else {
309-
// First time we observe this error — stamp the annotation onto the VS.
310-
originVS := vs.DeepCopy()
311-
kubeutil.AddAnnotations(&vs.ObjectMeta, map[string]string{
312-
velerov1api.VSErrorFirstObservedTimeAnnotation: now.Format(time.RFC3339),
313-
})
314-
if patchErr := p.crClient.Patch(
315-
context.Background(), vs, crclient.MergeFrom(originVS),
316-
); patchErr != nil {
317-
p.log.Warnf("Failed to patch VolumeSnapshot %s/%s with error first observed time: %v",
318-
vs.Namespace, vs.Name, patchErr)
319-
}
320-
}
321-
322298
timeout := backup.Spec.CSISnapshotTimeout.Duration
323-
if timeout > 0 && now.Sub(firstObservedTime) >= timeout {
299+
if timeout > 0 && time.Since(progress.Started) >= timeout {
324300
p.log.Errorf(
325301
"VolumeSnapshot %s/%s has a persistent error beyond CSISnapshotTimeout (%s): %s",
326302
vs.Namespace, vs.Name, timeout, errorMessage)
327303
progress.Completed = true
328-
progress.Updated = now
304+
progress.Updated = time.Now()
329305
progress.Err = fmt.Sprintf("VolumeSnapshot %s/%s has a persistent error: %s",
330306
vs.Namespace, vs.Name, errorMessage)
331307
return progress, nil
@@ -362,12 +338,24 @@ func (p *volumeSnapshotBackupItemAction) Progress(
362338
progress.Completed = true
363339
progress.Updated = now
364340
} else if vsc.Status.Error != nil {
365-
progress.Completed = true
366-
progress.Updated = now
341+
errorMessage := ""
367342
if vsc.Status.Error.Message != nil {
368-
progress.Err = *vsc.Status.Error.Message
343+
errorMessage = *vsc.Status.Error.Message
344+
}
345+
346+
timeout := backup.Spec.CSISnapshotTimeout.Duration
347+
if timeout > 0 && time.Since(progress.Started) >= timeout {
348+
p.log.Errorf(
349+
"VolumeSnapshotContent %s has a persistent error beyond CSISnapshotTimeout (%s): %s",
350+
vsc.Name, timeout, errorMessage)
351+
progress.Completed = true
352+
progress.Updated = now
353+
progress.Err = fmt.Sprintf("VolumeSnapshotContent %s has a persistent error: %s",
354+
vsc.Name, errorMessage)
355+
} else {
356+
p.log.Warnf("VolumeSnapshotContent %s has an error within the CSISnapshotTimeout window: %s. Snapshot controller will retry later.",
357+
vsc.Name, errorMessage)
369358
}
370-
p.log.Warnf("VolumeSnapshotContent meets an error %s.", progress.Err)
371359
}
372360
}
373361

pkg/backup/actions/csi/volumesnapshot_action_test.go

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,9 @@ func TestVSProgress(t *testing.T) {
230230
expectedErr: false,
231231
},
232232
{
233-
name: "VS status has error, annotation within CSISnapshotTimeout",
234-
operationID: "ns/name/2024-04-11T18:49:00+08:00",
235-
vs: builder.ForVolumeSnapshot("ns", "name").
236-
ObjectMeta(builder.WithAnnotations(
237-
velerov1api.VSErrorFirstObservedTimeAnnotation, time.Now().Format(time.RFC3339),
238-
)).
239-
Status().
233+
name: "VS status has error, within CSISnapshotTimeout (recent start time)",
234+
operationID: "ns/name/" + time.Now().Format(time.RFC3339),
235+
vs: builder.ForVolumeSnapshot("ns", "name").Status().
240236
StatusError(snapshotv1api.VolumeSnapshotError{
241237
Message: &errorStr,
242238
}).Result(),
@@ -246,11 +242,7 @@ func TestVSProgress(t *testing.T) {
246242
{
247243
name: "VS status has persistent error beyond CSISnapshotTimeout",
248244
operationID: "ns/name/2024-04-11T18:49:00+08:00",
249-
vs: builder.ForVolumeSnapshot("ns", "name").
250-
ObjectMeta(builder.WithAnnotations(
251-
velerov1api.VSErrorFirstObservedTimeAnnotation, "2024-04-11T18:49:00+08:00",
252-
)).
253-
Status().
245+
vs: builder.ForVolumeSnapshot("ns", "name").Status().
254246
StatusError(snapshotv1api.VolumeSnapshotError{
255247
Message: &errorStr,
256248
}).Result(),
@@ -292,7 +284,21 @@ func TestVSProgress(t *testing.T) {
292284
expectedProgress: &velero.OperationProgress{Completed: true},
293285
},
294286
{
295-
name: "VSC status has error",
287+
name: "VSC status has error within CSISnapshotTimeout",
288+
operationID: "ns/name/" + time.Now().Format(time.RFC3339),
289+
vs: builder.ForVolumeSnapshot("ns", "name").Status().
290+
ReadyToUse(true).BoundVolumeSnapshotContentName("vsc").Result(),
291+
vsc: builder.ForVolumeSnapshotContent("vsc").
292+
Status(&snapshotv1api.VolumeSnapshotContentStatus{
293+
Error: &snapshotv1api.VolumeSnapshotError{
294+
Message: &errorStr,
295+
},
296+
}).Result(),
297+
backup: builder.ForBackup("velero", "backup").CSISnapshotTimeout(10 * time.Minute).Result(),
298+
expectedErr: false,
299+
},
300+
{
301+
name: "VSC status has persistent error beyond CSISnapshotTimeout",
296302
operationID: "ns/name/2024-04-11T18:49:00+08:00",
297303
vs: builder.ForVolumeSnapshot("ns", "name").Status().
298304
ReadyToUse(true).BoundVolumeSnapshotContentName("vsc").Result(),
@@ -302,11 +308,11 @@ func TestVSProgress(t *testing.T) {
302308
Message: &errorStr,
303309
},
304310
}).Result(),
305-
backup: builder.ForBackup("velero", "backup").Result(),
311+
backup: builder.ForBackup("velero", "backup").CSISnapshotTimeout(10 * time.Minute).Result(),
306312
expectedErr: false,
307313
expectedProgress: &velero.OperationProgress{
308314
Completed: true,
309-
Err: "error",
315+
Err: fmt.Sprintf("VolumeSnapshotContent vsc has a persistent error: %s", errorStr),
310316
},
311317
},
312318
}

0 commit comments

Comments
 (0)