Skip to content

Commit f1da3c7

Browse files
committed
add debug log
Signed-off-by: dongmen <414110582@qq.com>
1 parent 87a1a8b commit f1da3c7

File tree

4 files changed

+326
-12
lines changed

4 files changed

+326
-12
lines changed

downstreamadapter/eventcollector/dispatcher_stat.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,21 @@ func (d *dispatcherStat) doReset(serverID node.ID, resetTs uint64) {
224224
}
225225
// remove the dispatcher from the dynamic stream
226226
resetRequest := d.newDispatcherResetRequest(d.eventCollector.getLocalServerID().String(), resetTs, epoch)
227+
if d.target.EnableSyncPoint() {
228+
req := resetRequest.DispatcherRequest
229+
log.Info("send reset dispatcher syncpoint request",
230+
zap.Stringer("changefeedID", d.target.GetChangefeedID()),
231+
zap.Stringer("dispatcher", d.getDispatcherID()),
232+
zap.Stringer("eventServiceID", serverID),
233+
zap.Uint64("epoch", epoch),
234+
zap.Uint64("resetTs", resetTs),
235+
zap.Uint64("checkpointTs", d.target.GetCheckpointTs()),
236+
zap.Uint64("lastEventCommitTs", d.lastEventCommitTs.Load()),
237+
zap.Uint64("syncPointTs", req.SyncPointTs),
238+
zap.Uint64("syncPointIntervalSeconds", req.SyncPointInterval),
239+
zap.Bool("skipSyncpointAtStartTs", d.target.GetSkipSyncpointAtStartTs()),
240+
)
241+
}
227242
msg := messaging.NewSingleTargetMessage(serverID, messaging.EventServiceTopic, resetRequest)
228243
d.eventCollector.enqueueMessageForSend(msg)
229244
log.Info("send reset dispatcher request to event service",

pkg/eventservice/dispatcher_stat.go

Lines changed: 101 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ func (c *changefeedStatus) tryEnterSyncPointPrepare(candidateTs uint64) bool {
504504
case c.syncPointInFlightTs.Load() != 0:
505505
return false
506506
case candidateTs < preparingTs:
507+
log.Info("syncpoint prepare ts moved backward",
508+
zap.Stringer("changefeedID", c.changefeedID),
509+
zap.Uint64("oldPreparingTs", preparingTs),
510+
zap.Uint64("newPreparingTs", candidateTs),
511+
zap.Uint64("inFlightTs", c.syncPointInFlightTs.Load()))
507512
c.syncPointPreparingTs.Store(candidateTs)
508513
return true
509514
default:
@@ -531,19 +536,56 @@ func (c *changefeedStatus) tryPromoteSyncPointToCommitIfReady() {
531536

532537
hasEligible := false
533538
ready := true
539+
blockingFound := false
540+
var (
541+
blockingDispatcherID common.DispatcherID
542+
blockingSentResolvedTs uint64
543+
blockingCheckpointTs uint64
544+
blockingNextSyncPoint uint64
545+
blockingSeq uint64
546+
blockingEpoch uint64
547+
)
534548
c.dispatchers.Range(func(_ any, value any) bool {
535549
dispatcher := value.(*atomic.Pointer[dispatcherStat]).Load()
536550
if dispatcher == nil || dispatcher.isRemoved.Load() || dispatcher.seq.Load() == 0 {
537551
return true
538552
}
539553
hasEligible = true
540-
if dispatcher.sentResolvedTs.Load() < preparingTs {
554+
sentResolvedTs := dispatcher.sentResolvedTs.Load()
555+
if sentResolvedTs < preparingTs {
541556
ready = false
557+
if !blockingFound {
558+
blockingFound = true
559+
blockingDispatcherID = dispatcher.id
560+
blockingSentResolvedTs = sentResolvedTs
561+
blockingCheckpointTs = dispatcher.checkpointTs.Load()
562+
blockingNextSyncPoint = dispatcher.nextSyncPoint.Load()
563+
blockingSeq = dispatcher.seq.Load()
564+
blockingEpoch = dispatcher.epoch
565+
}
542566
return false
543567
}
544568
return true
545569
})
546-
if !hasEligible || !ready {
570+
if !hasEligible {
571+
return
572+
}
573+
if !ready {
574+
fields := []zap.Field{
575+
zap.Stringer("changefeedID", c.changefeedID),
576+
zap.Uint64("preparingTs", preparingTs),
577+
}
578+
if blockingFound {
579+
fields = append(fields,
580+
zap.Stringer("blockingDispatcherID", blockingDispatcherID),
581+
zap.Uint64("blockingSentResolvedTs", blockingSentResolvedTs),
582+
zap.Uint64("blockingCheckpointTs", blockingCheckpointTs),
583+
zap.Uint64("blockingNextSyncPointTs", blockingNextSyncPoint),
584+
zap.Uint64("blockingSeq", blockingSeq),
585+
zap.Uint64("blockingEpoch", blockingEpoch),
586+
)
587+
}
588+
log.Debug("syncpoint prepare stage blocked by dispatcher", fields...)
547589
return
548590
}
549591

@@ -564,26 +606,77 @@ func (c *changefeedStatus) tryFinishSyncPointCommitIfAllEmitted() {
564606
}
565607

566608
canAdvance := true
609+
blockingFound := false
610+
blockingReason := ""
611+
var (
612+
blockingDispatcherID common.DispatcherID
613+
blockingNextSyncPointTs uint64
614+
blockingCheckpointTs uint64
615+
blockingSentResolvedTs uint64
616+
blockingDispatcherSeq uint64
617+
blockingDispatcherEpoch uint64
618+
)
567619
c.dispatchers.Range(func(_ any, value any) bool {
568620
dispatcher := value.(*atomic.Pointer[dispatcherStat]).Load()
569621
if dispatcher == nil || dispatcher.isRemoved.Load() || dispatcher.seq.Load() == 0 {
570622
return true
571623
}
572-
if dispatcher.nextSyncPoint.Load() <= inFlightTs {
624+
nextSyncPointTs := dispatcher.nextSyncPoint.Load()
625+
checkpointTs := dispatcher.checkpointTs.Load()
626+
if nextSyncPointTs <= inFlightTs {
573627
canAdvance = false
628+
if !blockingFound {
629+
blockingFound = true
630+
blockingReason = "nextSyncPointNotAdvanced"
631+
blockingDispatcherID = dispatcher.id
632+
blockingNextSyncPointTs = nextSyncPointTs
633+
blockingCheckpointTs = checkpointTs
634+
blockingSentResolvedTs = dispatcher.sentResolvedTs.Load()
635+
blockingDispatcherSeq = dispatcher.seq.Load()
636+
blockingDispatcherEpoch = dispatcher.epoch
637+
}
574638
return false
575639
}
576-
if dispatcher.checkpointTs.Load() <= inFlightTs {
640+
if checkpointTs <= inFlightTs {
577641
canAdvance = false
642+
if !blockingFound {
643+
blockingFound = true
644+
blockingReason = "checkpointNotAdvanced"
645+
blockingDispatcherID = dispatcher.id
646+
blockingNextSyncPointTs = nextSyncPointTs
647+
blockingCheckpointTs = checkpointTs
648+
blockingSentResolvedTs = dispatcher.sentResolvedTs.Load()
649+
blockingDispatcherSeq = dispatcher.seq.Load()
650+
blockingDispatcherEpoch = dispatcher.epoch
651+
}
578652
return false
579653
}
580654
return true
581655
})
582656

583-
if canAdvance {
584-
c.syncPointInFlightTs.Store(0)
585-
if c.syncPointPreparingTs.Load() == inFlightTs {
586-
c.syncPointPreparingTs.Store(0)
657+
if !canAdvance {
658+
fields := []zap.Field{
659+
zap.Stringer("changefeedID", c.changefeedID),
660+
zap.Uint64("inFlightTs", inFlightTs),
661+
zap.Uint64("preparingTs", c.syncPointPreparingTs.Load()),
587662
}
663+
if blockingFound {
664+
fields = append(fields,
665+
zap.String("blockingReason", blockingReason),
666+
zap.Stringer("blockingDispatcherID", blockingDispatcherID),
667+
zap.Uint64("blockingNextSyncPointTs", blockingNextSyncPointTs),
668+
zap.Uint64("blockingCheckpointTs", blockingCheckpointTs),
669+
zap.Uint64("blockingSentResolvedTs", blockingSentResolvedTs),
670+
zap.Uint64("blockingSeq", blockingDispatcherSeq),
671+
zap.Uint64("blockingEpoch", blockingDispatcherEpoch),
672+
)
673+
}
674+
log.Debug("syncpoint commit stage blocked by dispatcher", fields...)
675+
return
676+
}
677+
678+
c.syncPointInFlightTs.Store(0)
679+
if c.syncPointPreparingTs.Load() == inFlightTs {
680+
c.syncPointPreparingTs.Store(0)
588681
}
589682
}

pkg/eventservice/event_broker.go

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -497,9 +497,21 @@ func (c *eventBroker) nudgeSyncPointCommitIfNeeded(d *dispatcherStat) bool {
497497
if !c.shouldNudgeSyncPointCommit(d) {
498498
return false
499499
}
500+
commitTs := d.nextSyncPoint.Load()
501+
watermark := d.sentResolvedTs.Load()
502+
preparingTs := d.changefeedStat.getSyncPointPreparingTs()
503+
inFlightTs := d.changefeedStat.syncPointInFlightTs.Load()
504+
log.Debug("nudge syncpoint commit with resolved event",
505+
zap.Stringer("changefeedID", d.changefeedStat.changefeedID),
506+
zap.Stringer("dispatcherID", d.id),
507+
zap.Uint64("watermark", watermark),
508+
zap.Uint64("nextSyncPointTs", commitTs),
509+
zap.Uint64("preparingTs", preparingTs),
510+
zap.Uint64("inFlightTs", inFlightTs),
511+
zap.Bool("inCommitStage", d.changefeedStat.isSyncPointInCommitStage(commitTs)))
500512
// Resend resolved-ts at current watermark to trigger syncpoint emission in commit stage,
501513
// even when there is no fresh upstream event to drive a new scan.
502-
c.sendResolvedTs(d, d.sentResolvedTs.Load())
514+
c.sendResolvedTs(d, watermark)
503515
return true
504516
}
505517

@@ -722,7 +734,30 @@ func (c *eventBroker) getScanTaskDataRange(task scanTask) (bool, common.DataRang
722734

723735
if dataRange.CommitTsEnd <= dataRange.CommitTsStart {
724736
updateMetricEventServiceSkipResolvedTsCount(task.info.GetMode())
725-
if c.nudgeSyncPointCommitIfNeeded(task) {
737+
nextSyncPointTs := uint64(0)
738+
preparingTs := uint64(0)
739+
inFlightTs := uint64(0)
740+
if task.enableSyncPoint {
741+
nextSyncPointTs = task.nextSyncPoint.Load()
742+
preparingTs = task.changefeedStat.getSyncPointPreparingTs()
743+
inFlightTs = task.changefeedStat.syncPointInFlightTs.Load()
744+
}
745+
nudged := c.nudgeSyncPointCommitIfNeeded(task)
746+
log.Debug("scan range empty after capping",
747+
zap.Stringer("changefeedID", task.changefeedStat.changefeedID),
748+
zap.Stringer("dispatcherID", task.id),
749+
zap.Uint64("startTs", dataRange.CommitTsStart),
750+
zap.Uint64("endTs", dataRange.CommitTsEnd),
751+
zap.Uint64("scanMaxTs", scanMaxTs),
752+
zap.Uint64("ddlResolvedTs", ddlState.ResolvedTs),
753+
zap.Uint64("ddlCommitTs", ddlState.MaxEventCommitTs),
754+
zap.Bool("hasPendingDDLEventInCurrentRange", hasPendingDDLEventInCurrentRange),
755+
zap.Uint64("nextSyncPointTs", nextSyncPointTs),
756+
zap.Uint64("preparingTs", preparingTs),
757+
zap.Uint64("inFlightTs", inFlightTs),
758+
zap.Bool("nudgedSyncPointCommit", nudged),
759+
)
760+
if nudged {
726761
return false, common.DataRange{}
727762
}
728763
// Scan range can become empty after applying capping (for example, scan window).
@@ -756,15 +791,41 @@ func (c *eventBroker) capCommitTsEndBySyncPoint(task scanTask, commitTsEnd uint6
756791
if !task.enableSyncPoint {
757792
return commitTsEnd
758793
}
794+
originalCommitTsEnd := commitTsEnd
759795
c.fastForwardSyncPointIfNeeded(task)
796+
797+
cappedByNextSyncPoint := false
760798
nextSyncPoint := task.nextSyncPoint.Load()
761799
if nextSyncPoint > 0 && commitTsEnd > nextSyncPoint {
762800
task.changefeedStat.tryEnterSyncPointPrepare(nextSyncPoint)
763801
commitTsEnd = nextSyncPoint
802+
cappedByNextSyncPoint = true
764803
}
804+
805+
cappedByPreparingTs := false
765806
preparingTs := task.changefeedStat.getSyncPointPreparingTs()
766-
if preparingTs > 0 && !task.changefeedStat.isSyncPointInCommitStage(preparingTs) {
767-
commitTsEnd = min(commitTsEnd, preparingTs)
807+
inCommitStage := false
808+
if preparingTs > 0 {
809+
inCommitStage = task.changefeedStat.isSyncPointInCommitStage(preparingTs)
810+
if !inCommitStage {
811+
newCommitTsEnd := min(commitTsEnd, preparingTs)
812+
cappedByPreparingTs = newCommitTsEnd < commitTsEnd
813+
commitTsEnd = newCommitTsEnd
814+
}
815+
}
816+
817+
if commitTsEnd < originalCommitTsEnd {
818+
log.Debug("scan range commitTsEnd capped by syncpoint",
819+
zap.Stringer("changefeedID", task.changefeedStat.changefeedID),
820+
zap.Stringer("dispatcherID", task.id),
821+
zap.Uint64("oldCommitTsEnd", originalCommitTsEnd),
822+
zap.Uint64("newCommitTsEnd", commitTsEnd),
823+
zap.Bool("cappedByNextSyncPoint", cappedByNextSyncPoint),
824+
zap.Bool("cappedByPreparingTs", cappedByPreparingTs),
825+
zap.Uint64("nextSyncPointTs", nextSyncPoint),
826+
zap.Uint64("preparingTs", preparingTs),
827+
zap.Bool("inCommitStage", inCommitStage),
828+
)
768829
}
769830
return commitTsEnd
770831
}
@@ -1536,6 +1597,24 @@ func (c *eventBroker) resetDispatcher(dispatcherInfo DispatcherInfo) error {
15361597

15371598
newStat := newDispatcherStat(dispatcherInfo, uint64(len(c.taskChan)), uint64(len(c.messageCh)), tableInfo, status)
15381599
newStat.copyStatistics(oldStat)
1600+
if newStat.enableSyncPoint {
1601+
oldNextSyncPoint := oldStat.nextSyncPoint.Load()
1602+
newNextSyncPoint := newStat.nextSyncPoint.Load()
1603+
if newNextSyncPoint > 0 && oldNextSyncPoint > 0 && newNextSyncPoint < oldNextSyncPoint {
1604+
log.Warn("dispatcher syncpoint moved backward after reset",
1605+
zap.Stringer("changefeedID", changefeedID),
1606+
zap.Stringer("dispatcherID", dispatcherID),
1607+
zap.Uint64("oldEpoch", oldStat.epoch),
1608+
zap.Uint64("newEpoch", newStat.epoch),
1609+
zap.Uint64("oldStartTs", oldStat.info.GetStartTs()),
1610+
zap.Uint64("newStartTs", dispatcherInfo.GetStartTs()),
1611+
zap.Uint64("oldNextSyncPointTs", oldNextSyncPoint),
1612+
zap.Uint64("newNextSyncPointTs", newNextSyncPoint),
1613+
zap.Uint64("preparingTs", status.getSyncPointPreparingTs()),
1614+
zap.Uint64("inFlightTs", status.syncPointInFlightTs.Load()),
1615+
)
1616+
}
1617+
}
15391618

15401619
for {
15411620
if statPtr.CompareAndSwap(oldStat, newStat) {

0 commit comments

Comments
 (0)