Skip to content

Commit 5105d92

Browse files
SudipSinhaclaude
andcommitted
Update generation annotation in handleResume to prevent spurious re-runs
When a job is suspended then resumed, handleResume now records the current spec generation in the LastScheduledGenerationAnnotation before creating the pod. Without this, editing the spec while a job is suspended and then resuming it would leave a stale annotation, causing Reconcile to detect a false generation change and trigger an extra re-run after the resumed job completes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 02759ab commit 5105d92

1 file changed

Lines changed: 21 additions & 0 deletions

File tree

controllers/lmes/lmevaljob_controller.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,13 @@ func (r *LMEvalJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
194194
// re-runs with the updated configuration.
195195
if job.Status.State == lmesv1alpha1.CompleteJobState {
196196
if lastGen := getLastScheduledGeneration(job); lastGen > 0 && job.Generation > lastGen {
197+
// Delete the completed pod first. The replacement pod reuses the same
198+
// name (job.Name), so leaving the old one would cause handleNewCR to
199+
// fail with AlreadyExists when it tries to create it.
200+
if err := r.deleteJobPod(ctx, job); err != nil && client.IgnoreNotFound(err) != nil {
201+
log.Error(err, "failed to delete completed pod before re-run")
202+
return ctrl.Result{}, err
203+
}
197204
log.Info("spec changed for completed job, resetting for re-run",
198205
"name", job.Name,
199206
"previousGeneration", lastGen,
@@ -832,6 +839,20 @@ func (r *LMEvalJobReconciler) handleResume(ctx context.Context, log logr.Logger,
832839
}
833840
}
834841

842+
currentGenStr := strconv.FormatInt(job.Generation, 10)
843+
annotations := job.GetAnnotations()
844+
if annotations == nil {
845+
annotations = make(map[string]string)
846+
}
847+
if annotations[LastScheduledGenerationAnnotation] != currentGenStr {
848+
annotations[LastScheduledGenerationAnnotation] = currentGenStr
849+
job.SetAnnotations(annotations)
850+
if err := r.Update(ctx, job); err != nil {
851+
log.Error(err, "failed to update generation annotation on resume")
852+
return ctrl.Result{}, err
853+
}
854+
}
855+
835856
pod := CreatePod(Options, job, permConfig, caBundle, caBundleKey, log)
836857
if createErr := r.Create(ctx, pod); createErr != nil {
837858
log.Error(createErr, "failed to create pod to resume job")

0 commit comments

Comments
 (0)