From 9d92079d7545bc1e09e504f433224d898255dadb Mon Sep 17 00:00:00 2001 From: Rob Bell Date: Fri, 24 Apr 2026 08:36:51 +0100 Subject: [PATCH 1/2] chore(api): Remove duplicate TrainJob status patch Signed-off-by: Rob Bell --- pkg/controller/trainjob_controller.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/controller/trainjob_controller.go b/pkg/controller/trainjob_controller.go index 4c5e3529a5..6395b11b27 100644 --- a/pkg/controller/trainjob_controller.go +++ b/pkg/controller/trainjob_controller.go @@ -140,17 +140,16 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c err = errors.Join(err, statusErr) } - if deadlineResult, deadlineErr := r.reconcileDeadline(ctx, &trainJob); deadlineErr != nil || deadlineResult.RequeueAfter > 0 { - if !equality.Semantic.DeepEqual(&trainJob.Status, &prevTrainJob.Status) { - return deadlineResult, errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob))) - } - return deadlineResult, errors.Join(err, deadlineErr) - } + deadlineResult := r.reconcileDeadline(ctx, &trainJob) if !equality.Semantic.DeepEqual(&trainJob.Status, prevTrainJob.Status) { // TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support // for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183 - return ctrl.Result{}, errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob))) + err = errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob))) + } + + if deadlineResult.RequeueAfter > 0 { + return deadlineResult, err } return ctrl.Result{}, err } @@ -168,9 +167,9 @@ func (r *TrainJobReconciler) reconcileObjects(ctx context.Context, runtime jobru return nil } -func (r *TrainJobReconciler) reconcileDeadline(ctx context.Context, trainJob *trainer.TrainJob) (ctrl.Result, error) { +func (r *TrainJobReconciler) reconcileDeadline(ctx context.Context, trainJob *trainer.TrainJob) ctrl.Result { if trainJob.Spec.ActiveDeadlineSeconds == 0 || trainjob.IsTrainJobFinished(trainJob) || ptr.Deref(trainJob.Spec.Suspend, false) { - return ctrl.Result{}, nil + return ctrl.Result{} } startTime := trainJob.CreationTimestamp.Time suspendedCond := meta.FindStatusCondition(trainJob.Status.Conditions, trainer.TrainJobSuspended) @@ -178,7 +177,7 @@ func (r *TrainJobReconciler) reconcileDeadline(ctx context.Context, trainJob *tr startTime = suspendedCond.LastTransitionTime.Time } if startTime.IsZero() { - return ctrl.Result{}, nil + return ctrl.Result{} } deadline := startTime.Add(time.Duration(trainJob.Spec.ActiveDeadlineSeconds) * time.Second) now := time.Now() @@ -194,7 +193,7 @@ func (r *TrainJobReconciler) reconcileDeadline(ctx context.Context, trainJob *tr if err := client.IgnoreNotFound(r.client.Delete(ctx, jobSet)); err != nil { ctrl.LoggerFrom(ctx).V(2).Info("Failed to delete JobSet after deadline exceeded", "error", err) } - return ctrl.Result{}, nil + return ctrl.Result{} } requeueAfter := time.Until(deadline) if requeueAfter <= 0 { @@ -203,7 +202,7 @@ func (r *TrainJobReconciler) reconcileDeadline(ctx context.Context, trainJob *tr ctrl.LoggerFrom(ctx).V(2).Info("Scheduling deadline check", "activeDeadlineSeconds", trainJob.Spec.ActiveDeadlineSeconds, "requeueAfter", requeueAfter) - return ctrl.Result{RequeueAfter: requeueAfter}, nil + return ctrl.Result{RequeueAfter: requeueAfter} } func (r *TrainJobReconciler) Create(e event.TypedCreateEvent[*trainer.TrainJob]) bool { From 64bfc423356475accb62e7416508093643a65e95 Mon Sep 17 00:00:00 2001 From: Rob Bell Date: Mon, 27 Apr 2026 09:00:02 +0100 Subject: [PATCH 2/2] fix: compare status correctly Signed-off-by: Rob Bell --- pkg/controller/trainjob_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/trainjob_controller.go b/pkg/controller/trainjob_controller.go index 6395b11b27..8f4d9e6d4a 100644 --- a/pkg/controller/trainjob_controller.go +++ b/pkg/controller/trainjob_controller.go @@ -142,7 +142,7 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c deadlineResult := r.reconcileDeadline(ctx, &trainJob) - if !equality.Semantic.DeepEqual(&trainJob.Status, prevTrainJob.Status) { + if !equality.Semantic.DeepEqual(trainJob.Status, prevTrainJob.Status) { // TODO(astefanutti): Consider using SSA once controller-runtime client has SSA support // for sub-resources. See: https://github.com/kubernetes-sigs/controller-runtime/issues/3183 err = errors.Join(err, r.client.Status().Patch(ctx, &trainJob, client.MergeFrom(prevTrainJob)))