@@ -12,42 +12,43 @@ distributed under the License is distributed on an "AS IS" BASIS,
1212WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313See the License for the specific language governing permissions and
1414limitations under the License.
15-
16- RESOURCE CONFLICT RESOLUTION ENHANCEMENTS:
17- This file has been enhanced to resolve resource conflict issues that occurred when
18- multiple controllers or processes attempted to update the same NonAdminBackupStorageLocationRequest
19- objects simultaneously. The following changes were made:
20-
21- 1. RETRY LOGIC FRAMEWORK (updateStatusWithRetry function):
22- - Uses standard Kubernetes client-go retry.RetryOnConflict with DefaultRetry settings
23- - Handles "object has been modified" errors gracefully
24- - Fetches fresh object copies to avoid stale ResourceVersion conflicts
25- - Leverages proven Kubernetes retry patterns (5 attempts, 10ms+jitter)
26-
27- 2. NIL SAFETY CHECKS (ensureNonAdminRequest function):
28- - Prevents panic when SourceNonAdminBSL is nil during initialization
29- - Converts terminal errors to requeue conditions for uninitialized status
30- - Allows proper status initialization timing in high-concurrency environments
31-
32- 3. OPTIMIZED STATUS UPDATES (createNonAdminRequest function):
33- - Uses fast-path direct updates for new objects
34- - Falls back to retry logic only when conflicts are detected
35- - Preserves computed status values while ensuring conflict resilience
36-
37- 4. TEST ENVIRONMENT ADAPTATIONS:
38- - Increased timeouts to accommodate retry logic execution time
39- - Reduced polling frequency to handle Kubernetes client rate limiting
40- - Added delays to prevent overwhelming API server during test runs
41-
42- These enhancements ensure that OADP non-admin backup operations complete successfully
43- even under high concurrency or when multiple reconciliation events occur simultaneously.
4415*/
4516
17+ // RESOURCE CONFLICT RESOLUTION ENHANCEMENTS:
18+ // This file has been enhanced to resolve resource conflict issues that occurred when
19+ // multiple controllers or processes attempted to update the same NonAdminBackupStorageLocationRequest
20+ // objects simultaneously. The following changes were made:
21+ //
22+ // 1. RETRY LOGIC FRAMEWORK (updateStatusWithRetry function):
23+ // - Uses standard Kubernetes client-go retry.RetryOnConflict with DefaultRetry settings
24+ // - Handles "object has been modified" errors gracefully
25+ // - Fetches fresh object copies to avoid stale ResourceVersion conflicts
26+ // - Leverages proven Kubernetes retry patterns (5 attempts, 10ms+jitter)
27+ //
28+ // 2. NIL SAFETY CHECKS (ensureNonAdminRequest function):
29+ // - Prevents panic when SourceNonAdminBSL is nil during initialization
30+ // - Converts terminal errors to requeue conditions for uninitialized status
31+ // - Allows proper status initialization timing in high-concurrency environments
32+ //
33+ // 3. OPTIMIZED STATUS UPDATES (createNonAdminRequest function):
34+ // - Uses fast-path direct updates for new objects
35+ // - Falls back to retry logic only when conflicts are detected
36+ // - Preserves computed status values while ensuring conflict resilience
37+ //
38+ // 4. TEST ENVIRONMENT ADAPTATIONS:
39+ // - Increased timeouts to accommodate retry logic execution time
40+ // - Reduced polling frequency to handle Kubernetes client rate limiting
41+ // - Added delays to prevent overwhelming API server during test runs
42+ //
43+ // These enhancements ensure that OADP non-admin backup operations complete successfully
44+ // even under high concurrency or when multiple reconciliation events occur simultaneously.
45+
4646package controller
4747
4848import (
4949 "context"
5050 "errors"
51+ "fmt"
5152 "reflect"
5253 "time"
5354
@@ -125,7 +126,10 @@ func (r *NonAdminBackupStorageLocationReconciler) updateStatusWithRetry(ctx cont
125126 // Get the latest version of the object from the API server to ensure we have
126127 // the most recent ResourceVersion and avoid stale object conflicts
127128 key := client .ObjectKeyFromObject (obj )
128- fresh := obj .DeepCopyObject ().(client.Object )
129+ fresh , ok := obj .DeepCopyObject ().(client.Object )
130+ if ! ok {
131+ return errors .New ("failed to convert deep copy to client.Object" )
132+ }
129133 if err := r .Get (ctx , key , fresh ); err != nil {
130134 return err // RetryOnConflict will handle conflict vs non-conflict errors
131135 }
@@ -643,7 +647,11 @@ func (r *NonAdminBackupStorageLocationReconciler) createNonAdminRequest(ctx cont
643647 // - Event-driven reconciliation causing concurrent status updates
644648 logger .V (1 ).Info ("NonAdminBackupStorageLocationRequest already exists" )
645649 if updateErr := r .updateStatusWithRetry (ctx , logger , nabslRequest , func (obj client.Object ) bool {
646- req := obj .(* nacv1alpha1.NonAdminBackupStorageLocationRequest )
650+ req , ok := obj .(* nacv1alpha1.NonAdminBackupStorageLocationRequest )
651+ if ! ok {
652+ logger .Error (fmt .Errorf ("expected *NonAdminBackupStorageLocationRequest, got %T" , obj ), "Unexpected type assertion failure" )
653+ return false
654+ }
647655 return updatePhaseIfNeeded (& req .Status .Phase , req .Spec .ApprovalDecision )
648656 }); updateErr != nil {
649657 logger .Error (updateErr , failedUpdateStatusError )
@@ -704,26 +712,22 @@ func (r *NonAdminBackupStorageLocationReconciler) createNonAdminRequest(ctx cont
704712 // - Correctness: Proper status initialization even under load
705713 if updated := updateNonAdminRequestStatus (& nonAdminBslRequest .Status , nabsl , approvalDecision ); updated {
706714 if updateErr := r .Status ().Update (ctx , & nonAdminBslRequest ); updateErr != nil {
707- if apierrors .IsConflict (updateErr ) {
708- // CONFLICT DETECTED: Another process modified the request between create and status update
709- // This can happen when:
710- // - Admin approves/rejects the request immediately after creation
711- // - Multiple reconcile loops are triggered by related events
712- // - High concurrency in the test environment
713- logger .V (1 ).Info ("Conflict on initial status update, retrying with fresh object..." )
714- if retryErr := r .updateStatusWithRetry (ctx , logger , & nonAdminBslRequest , func (obj client.Object ) bool {
715- req := obj .(* nacv1alpha1.NonAdminBackupStorageLocationRequest )
716- return updateNonAdminRequestStatus (& req .Status , nabsl , approvalDecision )
717- }); retryErr != nil {
718- logger .Error (retryErr , failedUpdateStatusError )
719- return false , retryErr
720- }
721- } else {
722- // NON-CONFLICT ERROR: Validation, permission, or other API server issue
723- // Don't retry these as they're likely to persist
715+ if ! apierrors .IsConflict (updateErr ) {
724716 logger .Error (updateErr , failedUpdateStatusError )
725717 return false , updateErr
726718 }
719+ // CONFLICT DETECTED: Another process modified the request between create and status update
720+ logger .V (1 ).Info ("Conflict on initial status update, retrying with fresh object..." )
721+ if retryErr := r .updateStatusWithRetry (ctx , logger , & nonAdminBslRequest , func (obj client.Object ) bool {
722+ req , ok := obj .(* nacv1alpha1.NonAdminBackupStorageLocationRequest )
723+ if ! ok {
724+ return false
725+ }
726+ return updateNonAdminRequestStatus (& req .Status , nabsl , approvalDecision )
727+ }); retryErr != nil {
728+ logger .Error (retryErr , failedUpdateStatusError )
729+ return false , retryErr
730+ }
727731 }
728732 }
729733
0 commit comments