@@ -46,23 +46,17 @@ func (lw *LoopsWatch) WithOne(opts *types.LoopOneOptions) error {
46
46
return err
47
47
}
48
48
49
- startToken , err := opts . TokenHandler . GetStartWatchToken ( context . Background () )
49
+ watchOpt , err := lw . updateStartTokenInfo ( & opts . LoopOptions )
50
50
if err != nil {
51
- blog .Errorf ("%s job, run loop watch %s, but get start token failed, err: %v" , opts .Name , lw .streamWatch .DBName ,
52
- err )
53
51
return err
54
52
}
55
-
56
- // update the start token.
57
- if len (startToken ) != 0 {
58
- opts .WatchOpt .StartAfterToken = & types.EventToken {Data : startToken }
59
- }
53
+ watchOpt .WatchFatalErrorCallback = opts .TokenHandler .ResetWatchToken
60
54
61
55
var cancel func ()
62
56
var cancelCtx context.Context
63
57
cancelCtx , cancel = context .WithCancel (context .Background ())
64
58
65
- watcher , err := lw .streamWatch .Watch (cancelCtx , opts . WatchOpt )
59
+ watcher , err := lw .streamWatch .Watch (cancelCtx , watchOpt )
66
60
if err != nil {
67
61
blog .Errorf ("%s job, run loop, but watch failed, err: %v" , opts .Name , err )
68
62
cancel ()
@@ -88,30 +82,43 @@ func (lw *LoopsWatch) WithOne(opts *types.LoopOneOptions) error {
88
82
return nil
89
83
}
90
84
85
+ func (lw * LoopsWatch ) updateStartTokenInfo (opts * types.LoopOptions ) (* types.WatchOptions , error ) {
86
+ startToken , err := opts .TokenHandler .GetStartWatchToken (context .Background ())
87
+ if err != nil {
88
+ blog .Errorf ("%s job, loop watch db %s, but get start watch token failed, err: %v" , opts .Name ,
89
+ lw .streamWatch .DBName , err )
90
+ return nil , err
91
+ }
92
+
93
+ // update the start token.
94
+ if len (startToken .Token ) != 0 {
95
+ opts .WatchOpt .StartAfterToken = & types.EventToken {Data : startToken .Token }
96
+ }
97
+ if startToken .StartAtTime != nil {
98
+ opts .WatchOpt .StartAtTime = startToken .StartAtTime
99
+ }
100
+
101
+ return opts .WatchOpt , nil
102
+ }
103
+
91
104
// WithBatch allows users to watch events with batch.
92
105
func (lw * LoopsWatch ) WithBatch (opts * types.LoopBatchOptions ) error {
93
106
if err := opts .Validate (); err != nil {
94
107
blog .Errorf ("run loop watch batch, but option is invalid, err: %v" , err )
95
108
return err
96
109
}
97
110
98
- startToken , err := opts . TokenHandler . GetStartWatchToken ( context . Background () )
111
+ watchOpt , err := lw . updateStartTokenInfo ( & opts . LoopOptions )
99
112
if err != nil {
100
- blog .Errorf ("%s job, run loop watch batch %s, but get start token failed, err: %v" , opts .Name ,
101
- lw .streamWatch .DBName , err )
102
113
return err
103
114
}
104
-
105
- // update the start token.
106
- if len (startToken ) != 0 {
107
- opts .WatchOpt .StartAfterToken = & types.EventToken {Data : startToken }
108
- }
115
+ watchOpt .WatchFatalErrorCallback = opts .TokenHandler .ResetWatchToken
109
116
110
117
var cancel func ()
111
118
var cancelCtx context.Context
112
119
cancelCtx , cancel = context .WithCancel (context .Background ())
113
120
114
- watcher , err := lw .streamWatch .Watch (cancelCtx , opts . WatchOpt )
121
+ watcher , err := lw .streamWatch .Watch (cancelCtx , watchOpt )
115
122
if err != nil {
116
123
blog .Errorf ("%s job, run loop, but watch failed, err: %v" , opts .Name , err )
117
124
cancel ()
@@ -167,23 +174,16 @@ func (lw *LoopsWatch) watchRetry(cancel context.CancelFunc,
167
174
cancel ()
168
175
169
176
// use the last token to resume so that we can start again from where we stopped.
170
- lastToken , err := opts . TokenHandler . GetStartWatchToken ( ctx )
177
+ watchOpt , err := lw . updateStartTokenInfo ( opts )
171
178
if err != nil {
172
- blog .Errorf ("%s job, run loop watch, but get last event token failed, err: %v" , opts .Name , err )
173
179
// notify retry signal, exit loop
174
180
close (retrySignal )
175
181
continue
176
182
}
183
+ opts .WatchOpt = watchOpt
177
184
178
- blog .Errorf ("%s job, the former watch loop: %s failed, start retry again from token: %s." , opts .Name ,
179
- lw .streamWatch .DBName , lastToken )
180
-
181
- // set start after token if needed.
182
- if len (lastToken ) != 0 {
183
- // we have already received the new event and handle it success,
184
- // so we need to use this token. otherwise, we should still use the w.watchOpt.StartAfterToken
185
- opts .WatchOpt .StartAfterToken = & types.EventToken {Data : lastToken }
186
- }
185
+ blog .Errorf ("%s job, the former watch loop: %s failed, start retry again from token: %+v." , opts .Name ,
186
+ lw .streamWatch .DBName , watchOpt .StartAfterToken )
187
187
188
188
var cancelCtx context.Context
189
189
cancelCtx , cancel = context .WithCancel (ctx )
@@ -200,7 +200,8 @@ func (lw *LoopsWatch) watchRetry(cancel context.CancelFunc,
200
200
// start handle loop jobs
201
201
go doHandler (cancelCtx , watcher , retrySignal )
202
202
203
- blog .Warnf ("%s job, retry loop %s from token: %s success." , opts .Name , lw .streamWatch .DBName , lastToken )
203
+ blog .Warnf ("%s job, retry loop %s from token: %+v success." , opts .Name , lw .streamWatch .DBName ,
204
+ watchOpt .StartAfterToken )
204
205
}
205
206
}
206
207
}
@@ -220,17 +221,14 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context,
220
221
}
221
222
222
223
for {
223
-
224
224
reWatch , loop := observer .canLoop ()
225
225
if reWatch {
226
226
// stop the tick to release resource.
227
227
ticker .Stop ()
228
- blog .Warnf ("%s job, master status has changed, try to re-watch again, collection :%s" , opts .Name ,
228
+ blog .Warnf ("%s job, master status has changed, try to re-watch again, db :%s" , opts .Name ,
229
229
lw .streamWatch .DBName )
230
-
231
230
// trigger re-watch action now.
232
231
close (retrySignal )
233
-
234
232
// exit the for loop
235
233
return
236
234
}
@@ -248,14 +246,12 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context,
248
246
case <- ctxWithCancel .Done ():
249
247
// stop the tick to release resource.
250
248
ticker .Stop ()
251
-
252
249
blog .Warnf ("%s job, received cancel loop watch %s signal, exit loop." , opts .Name , lw .streamWatch .DBName )
253
250
// exist the goroutine
254
251
return
255
252
256
253
case one := <- watcher .EventChan :
257
254
batchEvents = append (batchEvents , one )
258
-
259
255
if blog .V (4 ) {
260
256
blog .Infof ("%s job, received %s event, detail: %s, op-time: %s, rid: %s" , opts .Name ,
261
257
lw .streamWatch .DBName , one .String (), one .ClusterTime .String (), one .ID ())
@@ -266,14 +262,12 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context,
266
262
// continue to get more events
267
263
continue
268
264
}
269
-
270
265
case <- ticker .C :
271
266
// handle with batch event.
272
267
if len (batchEvents ) == 0 {
273
268
// ticks, but no events received, loop next round to get events.
274
269
continue
275
270
}
276
-
277
271
case <- opts .StopNotifier :
278
272
ticker .Stop ()
279
273
blog .Warnf ("received stop %s loop watch job notify, stopping now." , opts .Name )
@@ -284,50 +278,62 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context,
284
278
break
285
279
}
286
280
287
- // for safety guarantee
288
- if len (batchEvents ) == 0 {
289
- continue
281
+ if lw .handleBatchEvents (ctxWithCancel , batchEvents , opts , retryObserver , retrySignal ) {
282
+ return
290
283
}
284
+ }
285
+ }
291
286
292
- first := batchEvents [0 ]
287
+ // handleBatchEvents handle batch events, returns if the loop watch needs retry
288
+ func (lw * LoopsWatch ) handleBatchEvents (ctx context.Context , batchEvents []* types.Event , opts * types.LoopBatchOptions ,
289
+ retryObserver * retryHandler , retrySignal chan struct {}) bool {
293
290
294
- blog .Infof ("%s job, received %s batch %d events, first op-time: %s rid: %s." , opts .Name , lw .streamWatch .DBName ,
295
- len (batchEvents ), first .ClusterTime .String (), first .ID ())
291
+ // for safety guarantee
292
+ if len (batchEvents ) == 0 {
293
+ return false
294
+ }
296
295
297
- retry := opts .EventHandler .DoBatch (batchEvents )
298
- if retry {
296
+ first := batchEvents [0 ]
299
297
300
- if retryObserver .canStillRetry () {
301
- blog .Warnf ("%s job, received %s %d events in batch, but do batch failed, retry now, rid: %s" , opts .Name ,
302
- lw .streamWatch .DBName , len (batchEvents ), first .ID ())
303
- // an error occurred, we need to retry it later.
304
- // tell the schedule to re-watch again.
305
- close (retrySignal )
306
- // exist this goroutine.
307
- return
308
- }
298
+ blog .Infof ("%s job, received %s batch %d events, first op-time: %s rid: %s." , opts .Name , lw .streamWatch .DBName ,
299
+ len (batchEvents ), first .ClusterTime .String (), first .ID ())
309
300
310
- blog .Warnf ("%s job, collection %s batch watch retry exceed max count, skip, rid: %s." , opts .Name ,
311
- lw .streamWatch .DBName , first .ID ())
312
- // save the event token now.
301
+ retry := opts .EventHandler .DoBatch (batchEvents )
302
+ if retry {
303
+ if retryObserver .canStillRetry () {
304
+ blog .Warnf ("%s job, received %s %d events in batch, but do batch failed, retry now, rid: %s" , opts .Name ,
305
+ lw .streamWatch .DBName , len (batchEvents ), first .ID ())
306
+ // an error occurred, we need to retry it later.
307
+ // tell the schedule to re-watch again.
308
+ close (retrySignal )
309
+ // exit this goroutine.
310
+ return true
313
311
}
314
312
315
- // reset retry counter so that the previous retry count will not affect the next event
316
- retryObserver .resetRetryCounter ()
313
+ blog .Warnf ("%s job, collection %s batch watch retry exceed max count, skip, rid: %s." , opts .Name ,
314
+ lw .streamWatch .DBName , first .ID ())
315
+ // save the event token now.
316
+ }
317
317
318
- last := batchEvents [len (batchEvents )- 1 ]
319
- // update the last watched token for resume usage.
320
- if err := opts .TokenHandler .SetLastWatchToken (ctxWithCancel , last .Token .Data ); err != nil {
321
- blog .Errorf ("%s job, loop watch %s event, but set last token failed, err: %v, rid: %s, retry later." ,
322
- opts .Name , lw .streamWatch .DBName , err , first .ID ())
318
+ // reset retry counter so that the previous retry count will not affect the next event
319
+ retryObserver .resetRetryCounter ()
323
320
324
- // retry later.
325
- close ( retrySignal )
326
- // exist this goroutine
327
- return
328
- }
321
+ last := batchEvents [ len ( batchEvents ) - 1 ]
322
+ // update the last watched token for resume usage.
323
+ lastToken := & types. TokenInfo {
324
+ Token : last . Token . Data ,
325
+ StartAtTime : & last . ClusterTime ,
329
326
}
330
-
327
+ if err := opts .TokenHandler .SetLastWatchToken (ctx , lastToken ); err != nil {
328
+ blog .Errorf ("%s job, loop watch %s event, but set last token failed, err: %v, rid: %s, retry later." ,
329
+ opts .Name , lw .streamWatch .DBName , err , first .ID ())
330
+
331
+ // retry later.
332
+ close (retrySignal )
333
+ // exit this goroutine
334
+ return true
335
+ }
336
+ return false
331
337
}
332
338
333
339
// tryLoopWithOne try handle event one by one
@@ -348,11 +354,9 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context,
348
354
blog .Warnf ("%s job, received cancel loop watch %s signal, exit loop, exit loop" , opts .Name ,
349
355
lw .streamWatch .DBName )
350
356
return
351
-
352
357
case <- opts .StopNotifier :
353
358
blog .Warnf ("received stop %s loop watch job notify, stopping now." , opts .Name )
354
359
return
355
-
356
360
default :
357
361
}
358
362
@@ -398,7 +402,11 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context,
398
402
retryObserver .resetRetryCounter ()
399
403
400
404
// update the last watched token for resume usage.
401
- if err := opts .TokenHandler .SetLastWatchToken (ctxWithCancel , one .Token .Data ); err != nil {
405
+ lastToken := & types.TokenInfo {
406
+ Token : one .Token .Data ,
407
+ StartAtTime : & one .ClusterTime ,
408
+ }
409
+ if err := opts .TokenHandler .SetLastWatchToken (ctxWithCancel , lastToken ); err != nil {
402
410
blog .Errorf ("%s job, loop watch %s event, but set last watched token failed, err: %v, rid: %s, " +
403
411
"retry later." , lw .streamWatch .DBName , err , one .ID ())
404
412
0 commit comments