@@ -149,8 +149,6 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
149149 deadlineResult , deadlineErr := r .reconcileDeadline (ctx , & trainJob )
150150 err = errors .Join (err , deadlineErr )
151151
152- // Commit upstream status first before RHAI runs, so ReconcileProgression
153- // re-fetches the latest committed state from the API server.
154152 // TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support
155153 // for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183
156154 if ! equality .Semantic .DeepEqual (& trainJob .Status , prevTrainJob .Status ) {
@@ -159,13 +157,8 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
159157 }
160158 }
161159
162- // RHAI progression tracking runs after upstream status is committed.
163- // ReconcileProgression re-fetches the TrainJob from the API server to get the
164- // latest committed state before patching annotations.
160+ // RHAI progression tracking (fetches fresh state from API server, logs errors without joining)
165161 result , progressionErr := progression .ReconcileProgression (ctx , r .client , r .apiReader , log , & trainJob )
166- // Don't join progression errors with upstream errors - progression errors during pod startup
167- // are expected (pod not ready, no IP yet) and shouldn't block requeueing.
168- // If progression error exists, log it but don't prevent the requeue.
169162 if progressionErr != nil {
170163 log .V (1 ).Info ("Progression tracking encountered an error (will retry on next reconcile)" , "error" , progressionErr )
171164 }
0 commit comments