Skip to content

Commit 1423c20

Browse files
committed
fix: do not restore PV verbatim when volume data was skipped via VolumePolicy
When a VolumePolicy action=skip is used during backup, the volume data is intentionally not backed up. During restore, Velero was restoring the original PV identity (same VolumeHandle) which is dangerous: - With Delete reclaim policy: underlying storage may no longer exist - In cross-cluster restore: two clusters would share the same storage This adds defense-in-depth in pv_restorer.go: when no snapshot is found and the PV has a Delete reclaim policy, return errPVNeedsReprovisioning to trigger dynamic re-provisioning instead of restoring the PV as-is. The callers in restore.go (both new and legacy paths) handle this error by inserting the PV into pvsToProvision. For Retain reclaim policy PVs without snapshots, a warning is now logged about the risks of cross-cluster restore scenarios. Fixes #9318 Signed-off-by: Mateen Ali Anjum <mateenali66@gmail.com>
1 parent 245525c commit 1423c20

3 files changed

Lines changed: 49 additions & 0 deletions

File tree

pkg/restore/pv_restorer.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package restore
1818

1919
import (
2020
"context"
21+
"fmt"
2122

2223
"github.com/pkg/errors"
2324
"github.com/sirupsen/logrus"
@@ -31,6 +32,11 @@ import (
3132
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
3233
)
3334

35+
// errPVNeedsReprovisioning is returned when a PV has no snapshot and a Delete
36+
// reclaim policy, indicating the underlying storage may no longer exist and the
37+
// PV should be dynamically re-provisioned instead of restored as-is.
38+
var errPVNeedsReprovisioning = fmt.Errorf("persistent volume has no snapshot and Delete reclaim policy; it should be dynamically re-provisioned")
39+
3440
type PVRestorer interface {
3541
executePVAction(obj *unstructured.Unstructured) (*unstructured.Unstructured, error)
3642
}
@@ -65,6 +71,10 @@ func (r *pvRestorer) executePVAction(obj *unstructured.Unstructured) (*unstructu
6571
}
6672
if snapshotInfo == nil {
6773
log.Infof("No snapshot found for persistent volume")
74+
if hasDeleteReclaimPolicy(obj.Object) {
75+
log.Warnf("Persistent volume has Delete reclaim policy but no snapshot; underlying storage may no longer exist, triggering dynamic re-provisioning")
76+
return nil, errPVNeedsReprovisioning
77+
}
6878
return obj, nil
6979
}
7080

pkg/restore/pv_restorer_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ func TestExecutePVAction_NoSnapshotRestores(t *testing.T) {
4949
volumeSnapshots []*volume.Snapshot
5050
locations []*api.VolumeSnapshotLocation
5151
expectedErr bool
52+
expectedErrIs error
5253
expectedRes *unstructured.Unstructured
5354
}{
5455
{
@@ -112,6 +113,23 @@ func TestExecutePVAction_NoSnapshotRestores(t *testing.T) {
112113
},
113114
expectedRes: newTestUnstructured().WithName("pv-1").WithSpec().Unstructured,
114115
},
116+
{
117+
name: "no snapshot and Delete reclaim policy: return errPVNeedsReprovisioning",
118+
obj: newTestUnstructured().WithName("pv-1").WithSpec().WithSpecField("persistentVolumeReclaimPolicy", "Delete").Unstructured,
119+
restore: builder.ForRestore(api.DefaultNamespace, "").RestorePVs(true).Result(),
120+
backup: defaultBackup().Result(),
121+
volumeSnapshots: []*volume.Snapshot{},
122+
expectedErr: true,
123+
expectedErrIs: errPVNeedsReprovisioning,
124+
},
125+
{
126+
name: "no snapshot and Retain reclaim policy: return PV as-is",
127+
obj: newTestUnstructured().WithName("pv-1").WithSpec().WithSpecField("persistentVolumeReclaimPolicy", "Retain").Unstructured,
128+
restore: builder.ForRestore(api.DefaultNamespace, "").RestorePVs(true).Result(),
129+
backup: defaultBackup().Result(),
130+
volumeSnapshots: []*volume.Snapshot{},
131+
expectedRes: newTestUnstructured().WithName("pv-1").WithSpec().WithSpecField("persistentVolumeReclaimPolicy", "Retain").Unstructured,
132+
},
115133
}
116134

117135
for _, tc := range tests {
@@ -135,6 +153,9 @@ func TestExecutePVAction_NoSnapshotRestores(t *testing.T) {
135153
case true:
136154
assert.Nil(t, res)
137155
require.Error(t, err)
156+
if tc.expectedErrIs != nil {
157+
assert.Equal(t, tc.expectedErrIs, err)
158+
}
138159
case false:
139160
assert.Equal(t, tc.expectedRes, res)
140161
assert.NoError(t, err)

pkg/restore/restore.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,11 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
12231223
case volume.NativeSnapshot:
12241224
obj, err = ctx.handlePVHasNativeSnapshot(obj, resourceClient)
12251225
if err != nil {
1226+
if err == errPVNeedsReprovisioning {
1227+
restoreLogger.Infof("Dynamically re-provisioning persistent volume because it has Delete reclaim policy and no snapshot data.")
1228+
ctx.pvsToProvision.Insert(backupResourceName)
1229+
return warnings, errs, itemExists
1230+
}
12261231
errs.Add(namespace, err)
12271232
return warnings, errs, itemExists
12281233
}
@@ -1270,6 +1275,11 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
12701275
case hasSnapshot(backupResourceName, ctx.volumeSnapshots):
12711276
obj, err = ctx.handlePVHasNativeSnapshot(obj, resourceClient)
12721277
if err != nil {
1278+
if err == errPVNeedsReprovisioning {
1279+
restoreLogger.Infof("Dynamically re-provisioning persistent volume because it has Delete reclaim policy and no snapshot data.")
1280+
ctx.pvsToProvision.Insert(backupResourceName)
1281+
return warnings, errs, itemExists
1282+
}
12731283
errs.Add(namespace, err)
12741284
return warnings, errs, itemExists
12751285
}
@@ -2557,6 +2567,9 @@ func (ctx *restoreContext) handlePVHasNativeSnapshot(obj *unstructured.Unstructu
25572567
ctx.log.Infof("Restoring persistent volume from snapshot.")
25582568
retObj, err = ctx.pvRestorer.executePVAction(retObj)
25592569
if err != nil {
2570+
if err == errPVNeedsReprovisioning {
2571+
return nil, err
2572+
}
25602573
return nil, fmt.Errorf("error executing PVAction for %s: %v", getResourceID(kuberesource.PersistentVolumes, "", oldName), err)
25612574
}
25622575

@@ -2600,6 +2613,11 @@ func (ctx *restoreContext) handleSkippedPVHasRetainPolicy(
26002613
logger logrus.FieldLogger,
26012614
) (*unstructured.Unstructured, error) {
26022615
logger.Infof("Restoring persistent volume as-is because it doesn't have a snapshot and its reclaim policy is not Delete.")
2616+
logger.Warnf("Warning: restoring PV %s with its original volume identity (VolumeHandle). "+
2617+
"If this volume's data was intentionally skipped during backup (e.g. via VolumePolicy action=skip), "+
2618+
"be aware that in cross-cluster restore scenarios this may cause two clusters to share the same "+
2619+
"underlying storage. Consider using Delete reclaim policy for volumes that are skipped during backup.",
2620+
obj.GetName())
26032621

26042622
// Check to see if the claimRef.namespace field needs to be remapped, and do so if necessary.
26052623
if _, err := remapClaimRefNS(ctx, obj); err != nil {

0 commit comments

Comments
 (0)