@@ -25,6 +25,16 @@ type snapshotHandler struct {
2525 logger loghelper.Logger
2626}
2727
28+ type updateDeleteResult struct {
29+ policyUpdated bool
30+ deleteIssued bool
31+ }
32+
33+ const (
34+ cleanupComplete = true
35+ cleanupInProgress = false
36+ )
37+
2838// createPreProvisionedVolumeSnapshot creates the pre-provisioned VolumeSnapshot
2939func (h * snapshotHandler ) createPreProvisionedVolumeSnapshot (ctx context.Context , requestLabel , requestName string , volumeSnapshotRequest volumes.SnapshotRequest ) (* snapshotsv1api.VolumeSnapshot , error ) {
3040 volumeSnapshotName := fmt .Sprintf ("%s-%s" , volumeSnapshotRequest .PersistentVolumeClaim .Name , requestName )
@@ -142,48 +152,63 @@ func (h *snapshotHandler) deleteVolumeSnapshot(ctx context.Context, requestLabel
142152 }
143153 volumeSnapshot , volumeSnapshotContent , err := h .getVolumeSnapshotResources (ctx , volumeSnapshotNamespace , volumeSnapshotName , volumeSnapshotContentName )
144154 if err != nil {
145- return false , fmt .Errorf ("failed to get volume snapshot resources for VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
155+ return cleanupInProgress , fmt .Errorf ("failed to get volume snapshot resources for VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
146156 }
147157
148158 resourceRecreated := false
149159 if volumeSnapshotContent == nil && recreateResourceIfNotFound {
150160 h .logger .Debugf ("VolumeSnapshotContent %s not found, recreate it" , volumeSnapshotContentName )
151161 _ , err = h .createVolumeSnapshotContentResource (ctx , requestLabel , requestName , volumeSnapshotRequest , snapshotHandle , snapshotsv1api .VolumeSnapshotContentDelete )
152162 if err != nil {
153- return false , fmt .Errorf ("failed to recreate VolumeSnapshotContent %s: %w" , volumeSnapshotContentName , err )
163+ return cleanupInProgress , fmt .Errorf ("failed to recreate VolumeSnapshotContent %s: %w" , volumeSnapshotContentName , err )
154164 }
155165 resourceRecreated = true
156166 }
157167 if volumeSnapshot == nil && recreateResourceIfNotFound {
158168 h .logger .Debugf ("VolumeSnapshot %s/%s not found, recreate it" , volumeSnapshotNamespace , volumeSnapshotName )
159169 _ , err = h .createPreProvisionedVolumeSnapshot (ctx , requestLabel , requestName , volumeSnapshotRequest )
160170 if err != nil {
161- return false , fmt .Errorf ("failed to recreate VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
171+ return cleanupInProgress , fmt .Errorf ("failed to recreate VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
162172 }
163173 resourceRecreated = true
164174 }
165175 if resourceRecreated {
166- return false , nil
176+ return cleanupInProgress , nil
167177 }
168178
169179 if volumeSnapshot == nil && volumeSnapshotContent == nil {
170180 // both the VolumeSnapshot and the VolumeSnapshotContent have been deleted
171- return true , nil
181+ return cleanupComplete , nil
172182 }
173183 if volumeSnapshot != nil {
174- volumeSnapshotJSON , _ := json .Marshal (volumeSnapshot )
175- h .logger .Debugf ("VolumeSnapshot %s/%s still not deleted: %s" , volumeSnapshot .Namespace , volumeSnapshot .Name , volumeSnapshotJSON )
184+ h .logger .Debugf ("VolumeSnapshot %s/%s deletion pending (deletionTimestamp=%v)" , volumeSnapshot .Namespace , volumeSnapshot .Name , volumeSnapshot .DeletionTimestamp )
176185 }
177186 if volumeSnapshotContent != nil {
178- volumeSnapshotContentJSON , _ := json .Marshal (volumeSnapshotContent )
179- h .logger .Debugf ("VolumeSnapshotContent %s still not deleted: %s" , volumeSnapshotContent .Name , volumeSnapshotContentJSON )
187+ h .logger .Debugf ("VolumeSnapshotContent %s deletion pending (policy=%s, deletionTimestamp=%v)" , volumeSnapshotContent .Name , volumeSnapshotContent .Spec .DeletionPolicy , volumeSnapshotContent .DeletionTimestamp )
180188 }
181189
182- err = h .updateAndDeleteVolumeSnapshotResource (ctx , volumeSnapshot , volumeSnapshotContent , snapshotsv1api .VolumeSnapshotContentDelete )
190+ result , err : = h .updateAndDeleteVolumeSnapshotResource (ctx , volumeSnapshot , volumeSnapshotContent , snapshotsv1api .VolumeSnapshotContentDelete )
183191 if err != nil {
184- return false , fmt .Errorf ("failed to delete volume snapshot: %w" , err )
192+ if kerrors .IsConflict (err ) {
193+ h .logger .Debugf ("Conflict deleting %s/%s (likely processed by controller), will retry: %v" , volumeSnapshotNamespace , volumeSnapshotName , err )
194+ return cleanupInProgress , nil
195+ }
196+ return cleanupInProgress , fmt .Errorf ("failed to delete volume snapshot: %w" , err )
197+ }
198+ if result .policyUpdated {
199+ h .logger .Debugf ("DeletionPolicy updated for %s/%s, waiting for controller reconciliation" , volumeSnapshotNamespace , volumeSnapshotName )
200+ return cleanupInProgress , nil
201+ }
202+ return cleanupInProgress , nil
203+ }
204+
205+ func isCleanupComplete (volumeSnapshot * snapshotsv1api.VolumeSnapshot , volumeSnapshotContent * snapshotsv1api.VolumeSnapshotContent ) bool {
206+ if volumeSnapshot == nil && volumeSnapshotContent == nil {
207+ return true
185208 }
186- return false , nil
209+ snapshotDeleting := volumeSnapshot == nil || ! volumeSnapshot .DeletionTimestamp .IsZero ()
210+ contentDeleting := volumeSnapshotContent == nil || ! volumeSnapshotContent .DeletionTimestamp .IsZero ()
211+ return snapshotDeleting && contentDeleting
187212}
188213
189214// cleanupVolumeSnapshotResource deletes the VolumeSnapshot and the VolumeSnapshotContent with the deletion policy set
@@ -192,23 +217,53 @@ func (h *snapshotHandler) deleteVolumeSnapshot(ctx context.Context, requestLabel
192217func (h * snapshotHandler ) cleanupVolumeSnapshotResource (ctx context.Context , volumeSnapshotNamespace , volumeSnapshotName string ) (bool , error ) {
193218 volumeSnapshot , volumeSnapshotContent , err := h .getVolumeSnapshotResources (ctx , volumeSnapshotNamespace , volumeSnapshotName , "" )
194219 if err != nil {
195- return false , fmt .Errorf ("failed to get volume snapshot resources for VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
220+ return cleanupInProgress , fmt .Errorf ("failed to get volume snapshot resources for VolumeSnapshot %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
196221 }
222+ // If resources are gone, cleanup is done
197223 if volumeSnapshot == nil && volumeSnapshotContent == nil {
198- return true , nil
224+ return cleanupComplete , nil
199225 }
200- err = h .updateAndDeleteVolumeSnapshotResource (ctx , volumeSnapshot , volumeSnapshotContent , snapshotsv1api .VolumeSnapshotContentRetain )
226+
227+ // Check if resources are already being deleted (have DeletionTimestamp set)
228+ // If they are, we consider them as effectively cleaned up
229+ if isCleanupComplete (volumeSnapshot , volumeSnapshotContent ) {
230+ h .logger .Debugf ("Volume snapshot resources for%s/%s are marked for deletion, cleanup considered complete" , volumeSnapshotNamespace , volumeSnapshotName )
231+ return cleanupComplete , nil
232+ }
233+
234+ result , err := h .updateAndDeleteVolumeSnapshotResource (ctx , volumeSnapshot , volumeSnapshotContent , snapshotsv1api .VolumeSnapshotContentRetain )
235+ if err != nil {
236+ if kerrors .IsConflict (err ) {
237+ h .logger .Debugf ("Conflict cleaning up volume snapshot resources for %s/%s (likely being processed), will retry: %v" , volumeSnapshotNamespace , volumeSnapshotName , err )
238+ return cleanupInProgress , nil
239+ }
240+ return cleanupInProgress , fmt .Errorf ("failed to cleanup volume snapshot resources for %s/%s: %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
241+ }
242+
243+ if result .policyUpdated {
244+ h .logger .Debugf ("DeletionPolicy updated for %s/%s, waiting for controller reconciliation" , volumeSnapshotNamespace , volumeSnapshotName )
245+ return cleanupInProgress , nil
246+ }
247+
248+ // Re-check to confirm deletion has started (DeletionTimestamp set) or resources are gone.
249+ volumeSnapshot , volumeSnapshotContent , err = h .getVolumeSnapshotResources (ctx , volumeSnapshotNamespace , volumeSnapshotName , "" )
201250 if err != nil {
202- return false , fmt .Errorf ("failed to cleanup volume snapshot resources: %w" , err )
251+ return cleanupInProgress , fmt .Errorf ("failed to recheck volume snapshot resources for VolumeSnapshot %s/%s : %w" , volumeSnapshotNamespace , volumeSnapshotName , err )
203252 }
204- return false , nil
253+
254+ if isCleanupComplete (volumeSnapshot , volumeSnapshotContent ) {
255+ return cleanupComplete , nil
256+ }
257+
258+ h .logger .Debugf ("Cleanup initiated for %s/%s but deletion timestamp not yet set. Will retry." , volumeSnapshotNamespace , volumeSnapshotName )
259+ return cleanupInProgress , nil
205260}
206261
207262func (h * snapshotHandler ) updateAndDeleteVolumeSnapshotResource (
208263 ctx context.Context ,
209264 volumeSnapshot * snapshotsv1api.VolumeSnapshot ,
210265 volumeSnapshotContent * snapshotsv1api.VolumeSnapshotContent ,
211- requiredVolumeSnapshotContentDeletionPolicy snapshotsv1api.DeletionPolicy ) error {
266+ requiredVolumeSnapshotContentDeletionPolicy snapshotsv1api.DeletionPolicy ) ( updateDeleteResult , error ) {
212267 if volumeSnapshotContent != nil &&
213268 volumeSnapshotContent .DeletionTimestamp .IsZero () &&
214269 volumeSnapshotContent .Spec .DeletionPolicy != requiredVolumeSnapshotContentDeletionPolicy {
@@ -217,16 +272,22 @@ func (h *snapshotHandler) updateAndDeleteVolumeSnapshotResource(
217272 // 2. DeletionPolicy=Delete when deleting the volume snapshots
218273 err := h .setVolumeSnapshotContentDeletionPolicy (ctx , volumeSnapshotContent .Name , requiredVolumeSnapshotContentDeletionPolicy )
219274 if err != nil {
220- return fmt .Errorf ("failed to set VolumeSnapshotContent %s DeletionPolicy to %s: %w" , volumeSnapshotContent .Name , requiredVolumeSnapshotContentDeletionPolicy , err )
275+ return updateDeleteResult {}, fmt .Errorf ("failed to set VolumeSnapshotContent %s DeletionPolicy to %s: %w" , volumeSnapshotContent .Name , requiredVolumeSnapshotContentDeletionPolicy , err )
221276 }
222- return nil
277+ return updateDeleteResult { policyUpdated : true }, nil
223278 }
224279
225- err := h .deleteVolumeSnapshotResources (ctx , volumeSnapshot , volumeSnapshotContent )
280+ err := h .deleteVolumeSnapshotObj (ctx , volumeSnapshot )
226281 if err != nil {
227- return fmt .Errorf ("failed to delete VolumeSnapshot and/or VolumeSnapshotContent : %w" , err )
282+ return updateDeleteResult {}, fmt .Errorf ("failed to delete VolumeSnapshot: %w" , err )
228283 }
229- return nil
284+
285+ err = h .deleteVolumeSnapshotContentObj (ctx , volumeSnapshot , volumeSnapshotContent )
286+ if err != nil {
287+ return updateDeleteResult {}, fmt .Errorf ("failed to delete VolumeSnapshotContent: %w" , err )
288+ }
289+
290+ return updateDeleteResult {deleteIssued : true }, nil
230291}
231292
232293func (h * snapshotHandler ) setVolumeSnapshotContentDeletionPolicy (ctx context.Context , volumeSnapshotContentName string , deletionPolicy snapshotsv1api.DeletionPolicy ) error {
@@ -246,27 +307,35 @@ func (h *snapshotHandler) setVolumeSnapshotContentDeletionPolicy(ctx context.Con
246307 return nil
247308}
248309
249- func (h * snapshotHandler ) deleteVolumeSnapshotResources (
310+ // deleteVolumeSnapshotObj deletes the VolumeSnapshot resource.
311+ func (h * snapshotHandler ) deleteVolumeSnapshotObj (
250312 ctx context.Context ,
251- volumeSnapshot * snapshotsv1api.VolumeSnapshot ,
252- volumeSnapshotContent * snapshotsv1api.VolumeSnapshotContent ) error {
313+ volumeSnapshot * snapshotsv1api.VolumeSnapshot ) error {
253314 if volumeSnapshot != nil &&
254315 volumeSnapshot .DeletionTimestamp .IsZero () {
255316 h .logger .Debugf ("Delete VolumeSnapshot %s/%s" , volumeSnapshot .Namespace , volumeSnapshot .Name )
256317 err := h .snapshotsClient .SnapshotV1 ().VolumeSnapshots (volumeSnapshot .Namespace ).Delete (ctx , volumeSnapshot .Name , metav1.DeleteOptions {})
257318 if err != nil && ! kerrors .IsNotFound (err ) {
258- // If the error is a conflict (e.g., controller is adding finalizers),
259- // it means the snapshot is being processed
319+ // If the error is a conflict, we return it to allow the reconciler to retry with backoff.
260320 if kerrors .IsConflict (err ) {
261321 h .logger .Debugf ("VolumeSnapshot %s/%s deletion conflicted (likely being processed by controller), will retry" , volumeSnapshot .Namespace , volumeSnapshot .Name )
262- } else {
263- return fmt .Errorf ("failed to delete VolumeSnapshot %s/%s: %w" , volumeSnapshot .Namespace , volumeSnapshot .Name , err )
264322 }
323+ return fmt .Errorf ("failed to delete VolumeSnapshot %s/%s: %w" , volumeSnapshot .Namespace , volumeSnapshot .Name , err )
265324 }
266325 }
326+ return nil
327+ }
328+
329+ // deleteVolumeSnapshotContentObj deletes the VolumeSnapshotContent resource.
330+ func (h * snapshotHandler ) deleteVolumeSnapshotContentObj (
331+ ctx context.Context ,
332+ volumeSnapshot * snapshotsv1api.VolumeSnapshot ,
333+ volumeSnapshotContent * snapshotsv1api.VolumeSnapshotContent ) error {
267334 if volumeSnapshotContent != nil &&
268335 volumeSnapshotContent .DeletionTimestamp .IsZero () &&
269- volumeSnapshotContent .Spec .DeletionPolicy == snapshotsv1api .VolumeSnapshotContentRetain {
336+ // If the VolumeSnapshot is already gone, the controller cannot clean up the content.
337+ // Delete it manually even when the policy is Delete to avoid orphaned content.
338+ (volumeSnapshotContent .Spec .DeletionPolicy == snapshotsv1api .VolumeSnapshotContentRetain || volumeSnapshot == nil ) {
270339 // Delete the VolumeSnapshotContent manually in case it has the Retain deletion policy.
271340 // Otherwise, the VolumeSnapshotContent resource will be deleted automatically by the snapshot-controller.
272341 // Here we have 2 cases:
@@ -277,6 +346,10 @@ func (h *snapshotHandler) deleteVolumeSnapshotResources(
277346 h .logger .Debugf ("Delete VolumeSnapshotContent %s" , volumeSnapshotContent .Name )
278347 err := h .snapshotsClient .SnapshotV1 ().VolumeSnapshotContents ().Delete (ctx , volumeSnapshotContent .Name , metav1.DeleteOptions {})
279348 if err != nil && ! kerrors .IsNotFound (err ) {
349+ // If the error is a conflict, we return it to allow the reconciler to retry with backoff.
350+ if kerrors .IsConflict (err ) {
351+ h .logger .Debugf ("VolumeSnapshotContent %s deletion conflicted (likely being processed by controller), will retry" , volumeSnapshotContent .Name )
352+ }
280353 return fmt .Errorf ("failed to delete VolumeSnapshotContent %s: %w" , volumeSnapshotContent .Name , err )
281354 }
282355 }
0 commit comments