Skip to content

Commit 1719fdd

Browse files
mprycclaude
andcommitted
Fix OADP-7562: handle wildcard in scoped excluded cluster resources
When a user sets excludedClusterScopedResources or excludedNamespaceScopedResources to '*', skip appending default resources to avoid Velero FailedValidation. add VolumeGroupSnapshotLabelKey to enforced backup spec tests to allow tests passing. fix lint issues. Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Michal Pryc <mpryc@redhat.com>
1 parent 31a3bfc commit 1719fdd

6 files changed

Lines changed: 114 additions & 55 deletions

File tree

internal/common/constant/constant.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ const JSONTagString = "json"
9393
// CommaString defines a constant for the comma string
9494
const CommaString = ","
9595

96+
// WildcardString defines the wildcard character used in Velero resource filters
97+
const WildcardString = "*"
98+
9699
// MaximumNacObjectNameLength represents Generated Non Admin Object Name and
97100
// must be below 63 characters, because it's used within object Label Value
98101
const MaximumNacObjectNameLength = validation.DNS1123LabelMaxLength

internal/common/function/function_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ func TestValidateBackupSpecEnforcedFields(t *testing.T) {
378378
ParallelFilesUpload: 32, //nolint:revive // just test
379379
},
380380
},
381+
{
382+
name: "VolumeGroupSnapshotLabelKey",
383+
enforcedValue: "enforced-label-key",
384+
overrideValue: "user-label-key",
385+
},
381386
}
382387
for _, test := range tests {
383388
t.Run(test.name, func(t *testing.T) {

internal/controller/nonadminbackup_controller.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"context"
2222
"errors"
2323
"reflect"
24+
"slices"
2425

2526
"github.com/go-logr/logr"
2627
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
@@ -705,11 +706,18 @@ func (r *NonAdminBackupReconciler) createVeleroBackupAndSyncWithNonAdminBackup(c
705706

706707
if haveNewResourceFilterParameters {
707708
// Use the new-style exclusion list
708-
backupSpec.ExcludedNamespaceScopedResources = append(backupSpec.ExcludedNamespaceScopedResources,
709-
alwaysExcludedNamespacedResources...)
710-
backupSpec.ExcludedClusterScopedResources = append(backupSpec.ExcludedClusterScopedResources,
711-
alwaysExcludedClusterResources...)
712-
} else {
709+
// Skip appending when wildcard '*' is already present, as it already
710+
// excludes everything and mixing '*' with specific items causes Velero
711+
// FailedValidation.
712+
if !slices.Contains(backupSpec.ExcludedNamespaceScopedResources, constant.WildcardString) {
713+
backupSpec.ExcludedNamespaceScopedResources = append(backupSpec.ExcludedNamespaceScopedResources,
714+
alwaysExcludedNamespacedResources...)
715+
}
716+
if !slices.Contains(backupSpec.ExcludedClusterScopedResources, constant.WildcardString) {
717+
backupSpec.ExcludedClusterScopedResources = append(backupSpec.ExcludedClusterScopedResources,
718+
alwaysExcludedClusterResources...)
719+
}
720+
} else if !slices.Contains(backupSpec.ExcludedResources, constant.WildcardString) {
713721
// Fallback to the old-style exclusion list
714722
backupSpec.ExcludedResources = append(backupSpec.ExcludedResources,
715723
alwaysExcludedNamespacedResources...)

internal/controller/nonadminbackup_controller_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller",
15461546
},
15471547
addSyncLabel: true,
15481548
}),
1549+
ginkgo.Entry("Should not append default excluded resources when wildcard is used in excludedClusterScopedResources", nonAdminBackupFullReconcileScenario{
1550+
spec: nacv1alpha1.NonAdminBackupSpec{
1551+
BackupSpec: &velerov1.BackupSpec{
1552+
ExcludedClusterScopedResources: []string{"*"},
1553+
IncludedNamespaceScopedResources: []string{"pvc"},
1554+
},
1555+
},
1556+
status: nacv1alpha1.NonAdminBackupStatus{
1557+
Phase: nacv1alpha1.NonAdminPhaseCreated,
1558+
VeleroBackup: &nacv1alpha1.VeleroBackup{
1559+
Namespace: oadpNamespace,
1560+
Status: nil,
1561+
Spec: &velerov1.BackupSpec{
1562+
ExcludedClusterScopedResources: []string{"*"},
1563+
IncludedNamespaceScopedResources: []string{"pvc"},
1564+
ExcludedNamespaceScopedResources: []string{
1565+
nacv1alpha1.NonAdminBackups,
1566+
nacv1alpha1.NonAdminRestores,
1567+
nacv1alpha1.NonAdminBackupStorageLocations,
1568+
},
1569+
},
1570+
},
1571+
Conditions: []metav1.Condition{
1572+
{
1573+
Type: "Accepted",
1574+
Status: metav1.ConditionTrue,
1575+
Reason: "BackupAccepted",
1576+
Message: "backup accepted",
1577+
},
1578+
{
1579+
Type: "Queued",
1580+
Status: metav1.ConditionTrue,
1581+
Reason: "BackupScheduled",
1582+
Message: "Created Velero Backup object",
1583+
},
1584+
},
1585+
},
1586+
}),
15491587
)
15501588

15511589
ginkgo.DescribeTable("Reconcile triggered by NonAdminBackup sync event",

internal/controller/nonadminbackupstoragelocation_controller.go

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,37 @@ distributed under the License is distributed on an "AS IS" BASIS,
1212
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
15-
16-
RESOURCE CONFLICT RESOLUTION ENHANCEMENTS:
17-
This file has been enhanced to resolve resource conflict issues that occurred when
18-
multiple controllers or processes attempted to update the same NonAdminBackupStorageLocationRequest
19-
objects simultaneously. The following changes were made:
20-
21-
1. RETRY LOGIC FRAMEWORK (updateStatusWithRetry function):
22-
- Uses standard Kubernetes client-go retry.RetryOnConflict with DefaultRetry settings
23-
- Handles "object has been modified" errors gracefully
24-
- Fetches fresh object copies to avoid stale ResourceVersion conflicts
25-
- Leverages proven Kubernetes retry patterns (5 attempts, 10ms+jitter)
26-
27-
2. NIL SAFETY CHECKS (ensureNonAdminRequest function):
28-
- Prevents panic when SourceNonAdminBSL is nil during initialization
29-
- Converts terminal errors to requeue conditions for uninitialized status
30-
- Allows proper status initialization timing in high-concurrency environments
31-
32-
3. OPTIMIZED STATUS UPDATES (createNonAdminRequest function):
33-
- Uses fast-path direct updates for new objects
34-
- Falls back to retry logic only when conflicts are detected
35-
- Preserves computed status values while ensuring conflict resilience
36-
37-
4. TEST ENVIRONMENT ADAPTATIONS:
38-
- Increased timeouts to accommodate retry logic execution time
39-
- Reduced polling frequency to handle Kubernetes client rate limiting
40-
- Added delays to prevent overwhelming API server during test runs
41-
42-
These enhancements ensure that OADP non-admin backup operations complete successfully
43-
even under high concurrency or when multiple reconciliation events occur simultaneously.
4415
*/
4516

17+
// RESOURCE CONFLICT RESOLUTION ENHANCEMENTS:
18+
// This file has been enhanced to resolve resource conflict issues that occurred when
19+
// multiple controllers or processes attempted to update the same NonAdminBackupStorageLocationRequest
20+
// objects simultaneously. The following changes were made:
21+
//
22+
// 1. RETRY LOGIC FRAMEWORK (updateStatusWithRetry function):
23+
// - Uses standard Kubernetes client-go retry.RetryOnConflict with DefaultRetry settings
24+
// - Handles "object has been modified" errors gracefully
25+
// - Fetches fresh object copies to avoid stale ResourceVersion conflicts
26+
// - Leverages proven Kubernetes retry patterns (5 attempts, 10ms+jitter)
27+
//
28+
// 2. NIL SAFETY CHECKS (ensureNonAdminRequest function):
29+
// - Prevents panic when SourceNonAdminBSL is nil during initialization
30+
// - Converts terminal errors to requeue conditions for uninitialized status
31+
// - Allows proper status initialization timing in high-concurrency environments
32+
//
33+
// 3. OPTIMIZED STATUS UPDATES (createNonAdminRequest function):
34+
// - Uses fast-path direct updates for new objects
35+
// - Falls back to retry logic only when conflicts are detected
36+
// - Preserves computed status values while ensuring conflict resilience
37+
//
38+
// 4. TEST ENVIRONMENT ADAPTATIONS:
39+
// - Increased timeouts to accommodate retry logic execution time
40+
// - Reduced polling frequency to handle Kubernetes client rate limiting
41+
// - Added delays to prevent overwhelming API server during test runs
42+
//
43+
// These enhancements ensure that OADP non-admin backup operations complete successfully
44+
// even under high concurrency or when multiple reconciliation events occur simultaneously.
45+
4646
package controller
4747

4848
import (
@@ -125,7 +125,10 @@ func (r *NonAdminBackupStorageLocationReconciler) updateStatusWithRetry(ctx cont
125125
// Get the latest version of the object from the API server to ensure we have
126126
// the most recent ResourceVersion and avoid stale object conflicts
127127
key := client.ObjectKeyFromObject(obj)
128-
fresh := obj.DeepCopyObject().(client.Object)
128+
fresh, ok := obj.DeepCopyObject().(client.Object)
129+
if !ok {
130+
return errors.New("failed to convert deep copy to client.Object")
131+
}
129132
if err := r.Get(ctx, key, fresh); err != nil {
130133
return err // RetryOnConflict will handle conflict vs non-conflict errors
131134
}
@@ -643,7 +646,10 @@ func (r *NonAdminBackupStorageLocationReconciler) createNonAdminRequest(ctx cont
643646
// - Event-driven reconciliation causing concurrent status updates
644647
logger.V(1).Info("NonAdminBackupStorageLocationRequest already exists")
645648
if updateErr := r.updateStatusWithRetry(ctx, logger, nabslRequest, func(obj client.Object) bool {
646-
req := obj.(*nacv1alpha1.NonAdminBackupStorageLocationRequest)
649+
req, ok := obj.(*nacv1alpha1.NonAdminBackupStorageLocationRequest)
650+
if !ok {
651+
return false
652+
}
647653
return updatePhaseIfNeeded(&req.Status.Phase, req.Spec.ApprovalDecision)
648654
}); updateErr != nil {
649655
logger.Error(updateErr, failedUpdateStatusError)
@@ -704,26 +710,22 @@ func (r *NonAdminBackupStorageLocationReconciler) createNonAdminRequest(ctx cont
704710
// - Correctness: Proper status initialization even under load
705711
if updated := updateNonAdminRequestStatus(&nonAdminBslRequest.Status, nabsl, approvalDecision); updated {
706712
if updateErr := r.Status().Update(ctx, &nonAdminBslRequest); updateErr != nil {
707-
if apierrors.IsConflict(updateErr) {
708-
// CONFLICT DETECTED: Another process modified the request between create and status update
709-
// This can happen when:
710-
// - Admin approves/rejects the request immediately after creation
711-
// - Multiple reconcile loops are triggered by related events
712-
// - High concurrency in the test environment
713-
logger.V(1).Info("Conflict on initial status update, retrying with fresh object...")
714-
if retryErr := r.updateStatusWithRetry(ctx, logger, &nonAdminBslRequest, func(obj client.Object) bool {
715-
req := obj.(*nacv1alpha1.NonAdminBackupStorageLocationRequest)
716-
return updateNonAdminRequestStatus(&req.Status, nabsl, approvalDecision)
717-
}); retryErr != nil {
718-
logger.Error(retryErr, failedUpdateStatusError)
719-
return false, retryErr
720-
}
721-
} else {
722-
// NON-CONFLICT ERROR: Validation, permission, or other API server issue
723-
// Don't retry these as they're likely to persist
713+
if !apierrors.IsConflict(updateErr) {
724714
logger.Error(updateErr, failedUpdateStatusError)
725715
return false, updateErr
726716
}
717+
// CONFLICT DETECTED: Another process modified the request between create and status update
718+
logger.V(1).Info("Conflict on initial status update, retrying with fresh object...")
719+
if retryErr := r.updateStatusWithRetry(ctx, logger, &nonAdminBslRequest, func(obj client.Object) bool {
720+
req, ok := obj.(*nacv1alpha1.NonAdminBackupStorageLocationRequest)
721+
if !ok {
722+
return false
723+
}
724+
return updateNonAdminRequestStatus(&req.Status, nabsl, approvalDecision)
725+
}); retryErr != nil {
726+
logger.Error(retryErr, failedUpdateStatusError)
727+
return false, retryErr
728+
}
727729
}
728730
}
729731

internal/controller/nonadminrestore_controller.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"errors"
2222
"reflect"
23+
"slices"
2324

2425
"github.com/go-logr/logr"
2526
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
@@ -348,8 +349,10 @@ func (r *NonAdminRestoreReconciler) createVeleroRestore(ctx context.Context, log
348349
}
349350
}
350351

351-
restoreSpec.ExcludedResources = append(restoreSpec.ExcludedResources,
352-
"volumesnapshotclasses")
352+
if !slices.Contains(restoreSpec.ExcludedResources, constant.WildcardString) {
353+
restoreSpec.ExcludedResources = append(restoreSpec.ExcludedResources,
354+
"volumesnapshotclasses")
355+
}
353356

354357
veleroRestore = &velerov1.Restore{
355358
ObjectMeta: metav1.ObjectMeta{

0 commit comments

Comments
 (0)