@@ -113,10 +113,10 @@ func (s *Scheduler[K]) Schedule(ctx context.Context, key K, d Details) {
113
113
}
114
114
next := d .Trigger .Next (now )
115
115
116
- s .scheduleLocked (ctx , key , next , 0 , nil , d .Window )
116
+ s .scheduleLocked (ctx , key , next , time. Time {}, 0 , nil , d .Window )
117
117
}
118
118
119
- func (s * Scheduler [K ]) reschedule (ctx * RunContext [K ]) {
119
+ func (s * Scheduler [K ]) reschedule (ctx * RunContext [K ], initialActivation time. Time ) {
120
120
key := ctx .Key
121
121
122
122
s .mu .Lock ()
@@ -143,29 +143,62 @@ func (s *Scheduler[K]) reschedule(ctx *RunContext[K]) {
143
143
next := d .Trigger .Next (now )
144
144
145
145
var (
146
- retno int8
147
- p Properties
146
+ retno int8
147
+ p Properties
148
+ preRescheduleActivation time.Time
148
149
)
149
150
switch {
150
151
case shouldContinue (ctx ):
151
152
next = now
152
153
retno = ctx .Retry
153
154
p = ctx .Properties
155
+ preRescheduleActivation = initialActivation
154
156
case shouldRetry (ctx , ctx .err ):
155
157
if d .Backoff != nil {
156
158
if b := d .Backoff .NextBackOff (); b != retry .Stop {
157
159
next = now .Add (b )
158
160
retno = ctx .Retry + 1
159
161
p = ctx .Properties
160
162
s .listener .OnRetryBackoff (ctx , key , b , retno )
163
+ preRescheduleActivation = initialActivation
161
164
}
162
165
}
163
166
default :
167
+ // Use initial activation time instead of now so that we don't skip
168
+ // activations of runs which ran longer than cron interval (#4309).
169
+ //
170
+ // Reschedule after long run:
171
+ // Cron activation: |-A-------A-------A------ ...
172
+ // Task execution: |-[EEEEEEEE][EEE]-[EEE]-- ...
173
+ //
174
+ // Reschedule after retries:
175
+ // Cron activation: |-A-------A-------A----- ...
176
+ // Task execution/retry: |-[E]-[R]-[R][EE]-[EE]-- ...
177
+ //
178
+ // Reschedule after maintenance window:
179
+ // Cron activation: |-A-------A-------A----- ...
180
+ // Maintenance window: |[WW]-[W]---[WWWWWWWWWWW ...
181
+ // Task execution/continue: |-[E]-[C]---[C][E][E]--- ...
182
+ //
183
+ // Note that initial activation run is not calculated for runs interrupted
184
+ // by pause/start and suspend/resume, as they are treated as fresh runs by
185
+ // the scheduler.
186
+ //
187
+ // In general, if task execution takes more time than cron interval,
188
+ // then the problem is on the cron definition side, but we should still
189
+ // try to alleviate this issue for "spontaneous" long task executions.
190
+ //
191
+ // The +1 should ensure that next is strictly after activation time.
192
+ // In case this assertion fails, fallback to the previous scheduling
193
+ // mechanism which uses now for calculating next activation time.
194
+ if a := d .Trigger .Next (initialActivation .Add (1 )); a .After (initialActivation ) {
195
+ next = a
196
+ }
164
197
if d .Backoff != nil {
165
198
d .Backoff .Reset ()
166
199
}
167
200
}
168
- s .scheduleLocked (ctx , key , next , retno , p , d .Window )
201
+ s .scheduleLocked (ctx , key , next , preRescheduleActivation , retno , p , d .Window )
169
202
}
170
203
171
204
func shouldContinue (ctx context.Context ) bool {
@@ -176,7 +209,7 @@ func shouldRetry(ctx context.Context, err error) bool {
176
209
return ! (err == nil || errors .Is (context .Cause (ctx ), ErrStoppedTask ) || retry .IsPermanent (err ))
177
210
}
178
211
179
- func (s * Scheduler [K ]) scheduleLocked (ctx context.Context , key K , next time.Time , retno int8 , p Properties , w Window ) {
212
+ func (s * Scheduler [K ]) scheduleLocked (ctx context.Context , key K , next , preRescheduleActivation time.Time , retno int8 , p Properties , w Window ) {
180
213
if next .IsZero () {
181
214
s .listener .OnNoTrigger (ctx , key )
182
215
s .unscheduleLocked (key )
@@ -186,7 +219,14 @@ func (s *Scheduler[K]) scheduleLocked(ctx context.Context, key K, next time.Time
186
219
begin , end := w .Next (next )
187
220
188
221
s .listener .OnSchedule (ctx , key , begin , end , retno )
189
- a := Activation [K ]{Key : key , Time : begin , Retry : retno , Properties : p , Stop : end }
222
+ a := Activation [K ]{
223
+ Time : begin ,
224
+ Key : key ,
225
+ Retry : retno ,
226
+ Properties : p ,
227
+ Stop : end ,
228
+ preRescheduleActivation : preRescheduleActivation ,
229
+ }
190
230
if s .queue .Push (a ) {
191
231
s .wakeup ()
192
232
}
@@ -230,7 +270,7 @@ func (s *Scheduler[K]) Trigger(ctx context.Context, key K) bool {
230
270
231
271
s .listener .OnTrigger (ctx , key , ok )
232
272
if ok {
233
- s .asyncRun (runCtx )
273
+ s .asyncRun (runCtx , s . now () )
234
274
}
235
275
return ok
236
276
}
@@ -326,7 +366,7 @@ func (s *Scheduler[_]) Start(ctx context.Context) {
326
366
runCtx := s .newRunContextLocked (a )
327
367
s .mu .Unlock ()
328
368
329
- s .asyncRun (runCtx )
369
+ s .asyncRun (runCtx , InitialActivation ( a ) )
330
370
}
331
371
332
372
s .listener .OnSchedulerStop (ctx )
@@ -393,14 +433,14 @@ func (s *Scheduler[K]) newRunContextLocked(a Activation[K]) *RunContext[K] {
393
433
return ctx
394
434
}
395
435
396
- func (s * Scheduler [K ]) asyncRun (ctx * RunContext [K ]) {
436
+ func (s * Scheduler [K ]) asyncRun (ctx * RunContext [K ], initialActivation time. Time ) {
397
437
s .listener .OnRunStart (ctx )
398
438
s .wg .Add (1 )
399
439
go func (ctx * RunContext [K ]) {
400
440
defer s .wg .Done ()
401
441
ctx .err = s .run (* ctx )
402
442
s .onRunEnd (ctx )
403
- s .reschedule (ctx )
443
+ s .reschedule (ctx , initialActivation )
404
444
}(ctx )
405
445
}
406
446
0 commit comments