Skip to content

Commit 9f923fd

Browse files
authored
Merge pull request #1 from hrathina/fix/rhoaieng-59039-rhai-e2e-v3
Fix/rhoaieng 59039 rhai e2e v3
2 parents c1c24fd + be12baf commit 9f923fd

2 files changed

Lines changed: 16 additions & 10 deletions

File tree

pkg/controller/trainjob_controller.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,23 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
144144
err = errors.Join(err, statusErr)
145145
}
146146

147-
if deadlineResult, deadlineErr := r.reconcileDeadline(ctx, &trainJob); deadlineErr != nil || deadlineResult.RequeueAfter > 0 {
148-
if !equality.Semantic.DeepEqual(&trainJob.Status, &prevTrainJob.Status) {
149-
return deadlineResult, errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob)))
150-
}
151-
return deadlineResult, errors.Join(err, deadlineErr)
152-
}
147+
deadlineResult, deadlineErr := r.reconcileDeadline(ctx, &trainJob)
148+
err = errors.Join(err, deadlineErr)
153149

154150
if !equality.Semantic.DeepEqual(&trainJob.Status, prevTrainJob.Status) {
155151
// TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support
156152
// for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183
157-
return ctrl.Result{}, errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob)))
153+
if statusErr := r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob)); statusErr != nil {
154+
return ctrl.Result{}, errors.Join(err, statusErr)
155+
}
158156
}
159157

160-
// RHAI progression tracking (use APIReader to avoid pod watches)
161-
result, progressionErr := progression.ReconcileProgression(ctx, r.client, r.apiReader, log, &trainJob)
162-
return result, errors.Join(err, progressionErr)
158+
// RHAI progression tracking
159+
result, _ := progression.ReconcileProgression(ctx, r.client, r.apiReader, log, &trainJob)
160+
if deadlineResult.RequeueAfter > 0 && (result.RequeueAfter == 0 || deadlineResult.RequeueAfter < result.RequeueAfter) {
161+
return deadlineResult, err
162+
}
163+
return result, err
163164
}
164165

165166
func (r *TrainJobReconciler) reconcileObjects(ctx context.Context, runtime jobruntimes.Runtime, trainJob *trainer.TrainJob) error {

pkg/rhai/progression/progression.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,11 @@ func ReconcileProgression(ctx context.Context, c client.Client, reader client.Re
721721
return ctrl.Result{}, nil
722722
}
723723

724+
// Re-fetch from API server to get latest status before patching annotations
725+
if err := reader.Get(ctx, client.ObjectKeyFromObject(trainJob), trainJob); err != nil {
726+
return ctrl.Result{}, client.IgnoreNotFound(err)
727+
}
728+
724729
isRunning := !meta.IsStatusConditionTrue(trainJob.Status.Conditions, trainer.TrainJobSuspended) &&
725730
!meta.IsStatusConditionTrue(trainJob.Status.Conditions, trainer.TrainJobComplete) &&
726731
!meta.IsStatusConditionTrue(trainJob.Status.Conditions, trainer.TrainJobFailed)

0 commit comments

Comments
 (0)