Skip to content

Commit df2686c

Browse files
authored
Add delay to avoid race conditions during VolumeSnapshotContent deletion (#9700)
* Add delay to avoid race conditions during VolumeSnapshotContent deletion Signed-off-by: Priyansh Choudhary <im1706@gmail.com> * updated changelog Signed-off-by: Priyansh Choudhary <im1706@gmail.com> * Updated Changelog Signed-off-by: Priyansh Choudhary <im1706@gmail.com>
1 parent 8a6ac7a commit df2686c

3 files changed

Lines changed: 58 additions & 0 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix issue #9699, add a 2-second gap between temporary CSI VolumeSnapshotContent create and delete operations

internal/delete/actions/csi/volumesnapshotcontent_action.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package csi
1818

1919
import (
2020
"context"
21+
"time"
2122

2223
"github.com/google/uuid"
2324
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1"
@@ -40,6 +41,10 @@ type volumeSnapshotContentDeleteItemAction struct {
4041
crClient crclient.Client
4142
}
4243

44+
const tempVSCCreateDeleteGap = 2 * time.Second
45+
46+
var sleepBetweenTempVSCCreateAndDelete = time.Sleep
47+
4348
// AppliesTo returns information indicating
4449
// VolumeSnapshotContentRestoreItemAction action should be invoked
4550
// while restoring VolumeSnapshotContent.snapshot.storage.k8s.io resources
@@ -123,6 +128,9 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute(
123128
}
124129
p.log.Infof("Created temp VolumeSnapshotContent %s with DeletionPolicy=Delete to trigger cloud snapshot cleanup", snapCont.Name)
125130

131+
// Add a small delay before delete to avoid create/delete race conditions in CSI controllers.
132+
sleepBetweenTempVSCCreateAndDelete(tempVSCCreateDeleteGap)
133+
126134
// Delete the temp VSC immediately to trigger cloud snapshot removal.
127135
// The CSI driver will handle the actual cloud snapshot deletion.
128136
if err := p.crClient.Delete(

internal/delete/actions/csi/volumesnapshotcontent_action_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"testing"
23+
"time"
2324

2425
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1"
2526
"github.com/sirupsen/logrus"
@@ -46,6 +47,21 @@ type fakeClientWithErrors struct {
4647
deleteError error
4748
}
4849

50+
type fakeClientWithCallTracking struct {
51+
crclient.Client
52+
events *[]string
53+
}
54+
55+
func (c *fakeClientWithCallTracking) Create(ctx context.Context, obj crclient.Object, opts ...crclient.CreateOption) error {
56+
*c.events = append(*c.events, "create")
57+
return c.Client.Create(ctx, obj, opts...)
58+
}
59+
60+
func (c *fakeClientWithCallTracking) Delete(ctx context.Context, obj crclient.Object, opts ...crclient.DeleteOption) error {
61+
*c.events = append(*c.events, "delete")
62+
return c.Client.Delete(ctx, obj, opts...)
63+
}
64+
4965
func (c *fakeClientWithErrors) Get(ctx context.Context, key crclient.ObjectKey, obj crclient.Object, opts ...crclient.GetOption) error {
5066
if c.getError != nil {
5167
return c.getError
@@ -325,6 +341,39 @@ func TestTryDeleteOriginalVSC(t *testing.T) {
325341
})
326342
}
327343

344+
func TestVSCExecute_CreateSleepDeleteOrder(t *testing.T) {
345+
snapshotHandleStr := "test"
346+
vsc := builder.ForVolumeSnapshotContent("bar").
347+
ObjectMeta(builder.WithLabelsMap(map[string]string{velerov1api.BackupNameLabel: "backup"})).
348+
Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleStr}).
349+
Result()
350+
351+
vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(vsc)
352+
require.NoError(t, err)
353+
354+
events := make([]string, 0, 3)
355+
realClient := velerotest.NewFakeControllerRuntimeClient(t)
356+
trackingClient := &fakeClientWithCallTracking{Client: realClient, events: &events}
357+
358+
originalSleep := sleepBetweenTempVSCCreateAndDelete
359+
t.Cleanup(func() {
360+
sleepBetweenTempVSCCreateAndDelete = originalSleep
361+
})
362+
363+
sleepBetweenTempVSCCreateAndDelete = func(d time.Duration) {
364+
require.Equal(t, tempVSCCreateDeleteGap, d)
365+
events = append(events, "sleep")
366+
}
367+
368+
p := volumeSnapshotContentDeleteItemAction{log: logrus.StandardLogger(), crClient: trackingClient}
369+
err = p.Execute(&velero.DeleteItemActionExecuteInput{
370+
Item: &unstructured.Unstructured{Object: vscMap},
371+
Backup: builder.ForBackup("velero", "backup").Result(),
372+
})
373+
require.NoError(t, err)
374+
require.Equal(t, []string{"create", "sleep", "delete"}, events)
375+
}
376+
328377
func boolPtr(b bool) *bool {
329378
return &b
330379
}

0 commit comments

Comments
 (0)