Skip to content

Commit 1172314

Browse files
committed
fix: progression tracking E2E with minimal upstream changes
1 parent b19e52f commit 1172314

2 files changed

Lines changed: 5 additions & 14 deletions

File tree

pkg/controller/trainjob_controller.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,26 +144,19 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
144144
err = errors.Join(err, statusErr)
145145
}
146146

147-
// reconcileDeadline may schedule a requeue to check the deadline later. Save the result
148-
// but do NOT return early — RHAI progression tracking must always run.
149147
deadlineResult, deadlineErr := r.reconcileDeadline(ctx, &trainJob)
150148
err = errors.Join(err, deadlineErr)
151149

152-
// TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support
153-
// for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183
154150
if !equality.Semantic.DeepEqual(&trainJob.Status, prevTrainJob.Status) {
151+
// TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support
152+
// for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183
155153
if statusErr := r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob)); statusErr != nil {
156154
return ctrl.Result{}, errors.Join(err, statusErr)
157155
}
158156
}
159157

160-
// RHAI progression tracking (fetches fresh state from API server, logs errors without joining)
161-
result, progressionErr := progression.ReconcileProgression(ctx, r.client, r.apiReader, log, &trainJob)
162-
if progressionErr != nil {
163-
log.V(1).Info("Progression tracking encountered an error (will retry on next reconcile)", "error", progressionErr)
164-
}
165-
166-
// Use the deadline requeue if it is sooner than the progression requeue.
158+
// RHAI progression tracking
159+
result, _ := progression.ReconcileProgression(ctx, r.client, r.apiReader, log, &trainJob)
167160
if deadlineResult.RequeueAfter > 0 && (result.RequeueAfter == 0 || deadlineResult.RequeueAfter < result.RequeueAfter) {
168161
return deadlineResult, err
169162
}

pkg/rhai/progression/progression.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -721,9 +721,7 @@ func ReconcileProgression(ctx context.Context, c client.Client, reader client.Re
721721
return ctrl.Result{}, nil
722722
}
723723

724-
// Re-fetch the TrainJob from the API server to ensure we have the latest state,
725-
// including any status updates committed by the upstream reconcile, before
726-
// patching annotations.
724+
// Re-fetch from API server to get latest status before patching annotations
727725
if err := reader.Get(ctx, client.ObjectKeyFromObject(trainJob), trainJob); err != nil {
728726
return ctrl.Result{}, client.IgnoreNotFound(err)
729727
}

0 commit comments

Comments
 (0)