-
Notifications
You must be signed in to change notification settings - Fork 138
feat: Enable Stats on Resumed Sync #558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: staging
Are you sure you want to change the base?
Changes from all commits
cdb961b
776fade
6398bf9
b79f70e
3b50a5d
335571a
bf1ac3a
7961d2b
3332a36
f3e044a
948b1fe
c9f54db
0315a45
628f7b3
c7952c2
ff82c4a
e7aa390
ebdb1ae
a27fdfd
e7a2dbc
724d045
3c225f4
d89c722
e74771b
bb814a6
521caa2
99699b5
68c35fa
ba93364
87751c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,14 +115,34 @@ var syncCmd = &cobra.Command{ | |
| return err | ||
| } | ||
|
|
||
| // Setup state early to enable pre-loading remaining records before stats logger starts | ||
| connector.SetupState(state) | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After implementing the prv changes, I tested it and... still got "Not Determined" :( So I started debugging. I added log statements everywhere to trace what was happening. That's when I discovered the timing issue in sync.go (probably): The stats logger was starting BEFORE the remaining records were loaded from state Ig-
Thats why I added pre-load logic in sync.go that runs BEFORE starting the logger. It iterates through all streams (FullLoad, CDC, Incremental), loads their remaining counts from state, and adds them to pool stats. |
||
| // Pre-load remaining record counts from state to pool stats for accurate progress tracking | ||
| for _, stream := range selectedStreamsMetadata.FullLoadStreams { | ||
| if remainingRecords := state.GetRemainingRecordCount(stream.Self()); remainingRecords > 0 { | ||
| pool.AddRecordsToSyncStats(remainingRecords) | ||
| logger.Infof("Pre-loaded remaining records for stream %s: %d", stream.ID(), remainingRecords) | ||
| } | ||
| } | ||
| for _, stream := range selectedStreamsMetadata.CDCStreams { | ||
| if remainingRecords := state.GetRemainingRecordCount(stream.Self()); remainingRecords > 0 { | ||
| pool.AddRecordsToSyncStats(remainingRecords) | ||
| logger.Infof("Pre-loaded remaining records for stream %s: %d", stream.ID(), remainingRecords) | ||
| } | ||
| } | ||
| for _, stream := range selectedStreamsMetadata.IncrementalStreams { | ||
| if remainingRecords := state.GetRemainingRecordCount(stream.Self()); remainingRecords > 0 { | ||
| pool.AddRecordsToSyncStats(remainingRecords) | ||
| logger.Infof("Pre-loaded remaining records for stream %s: %d", stream.ID(), remainingRecords) | ||
| } | ||
| } | ||
|
|
||
| // start monitoring stats | ||
| logger.StatsLogger(cmd.Context(), func() (int64, int64, int64) { | ||
| stats := pool.GetStats() | ||
| return stats.ThreadCount.Load(), stats.TotalRecordsToSync.Load(), stats.ReadCount.Load() | ||
| }) | ||
|
|
||
| // Setup State for Connector | ||
| connector.SetupState(state) | ||
| // Sync Telemetry tracking | ||
| telemetry.TrackSyncStarted(syncID, streams, selectedStreamsMetadata.SelectedStreams, selectedStreamsMetadata.CDCStreams, connector.Type(), destinationConfig, catalog) | ||
| defer func() { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,8 @@ const ( | |
| MixedType StateType = "MIXED" | ||
| // constant key for chunks | ||
| ChunksKey = "chunks" | ||
| // number of remaining records to be synced in resumable sync | ||
| RemainingRecordsCount = "remaining_records_count" | ||
| ) | ||
|
|
||
| type GlobalState struct { | ||
|
|
@@ -254,6 +256,81 @@ func (s *State) RemoveChunk(stream *ConfiguredStream, chunk Chunk) int { | |
| return -1 | ||
| } | ||
|
|
||
| // GetRemainingRecordCount retrieves the remaining record count for a stream from state | ||
| func (s *State) GetRemainingRecordCount(stream *ConfiguredStream) int64 { | ||
| s.RLock() | ||
| defer s.RUnlock() | ||
|
|
||
| index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool { | ||
| return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name() | ||
| }) | ||
| if contains { | ||
| if count, loaded := s.Streams[index].State.Load(RemainingRecordsCount); loaded { | ||
| if countInt64, ok := count.(int64); ok { | ||
| return countInt64 | ||
| } | ||
| if countFloat64, ok := count.(float64); ok { | ||
| return int64(countFloat64) | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one took me a while to figure out. I added more logging and noticed something weird - even though the state file had I traced through the code and found the issue in if count, loaded := s.Streams[index].State.Load(RemainingRecordCountKey); loaded {
if countInt64, ok := count.(int64); ok {
return countInt64 // This was failing!
}
}
return 0Ig Go's JSON unmarshaling converts ALL numbers to |
||
| } | ||
| } | ||
| return 0 | ||
| } | ||
|
|
||
| // SetRemainingRecordCount stores the remaining record count for a stream in state | ||
| func (s *State) SetRemainingRecordCount(stream *ConfiguredStream, count int64) { | ||
| s.Lock() | ||
| defer s.Unlock() | ||
|
|
||
| index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool { | ||
| return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name() | ||
| }) | ||
| if contains { | ||
| s.Streams[index].State.Store(RemainingRecordsCount, count) | ||
| s.Streams[index].HoldsValue.Store(true) | ||
| } else { | ||
| newStream := s.initStreamState(stream) | ||
| newStream.State.Store(RemainingRecordsCount, count) | ||
| newStream.HoldsValue.Store(true) | ||
| s.Streams = append(s.Streams, newStream) | ||
| } | ||
| s.LogState() | ||
| } | ||
|
|
||
| // DecrementRemainingRecordCount decrements the remaining record count for a stream in state | ||
| func (s *State) DecrementRemainingRecordCount(stream *ConfiguredStream, count int64) { | ||
| s.Lock() | ||
| defer s.Unlock() | ||
|
|
||
| index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool { | ||
| return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name() | ||
| }) | ||
| if contains { | ||
| if remaining, loaded := s.Streams[index].State.Load(RemainingRecordsCount); loaded { | ||
| var currentRemaining int64 | ||
|
|
||
| // Handle both int64 and float64 (when loaded from JSON) | ||
| if remainingInt64, ok := remaining.(int64); ok { | ||
| currentRemaining = remainingInt64 | ||
| } else if remainingFloat64, ok := remaining.(float64); ok { | ||
| currentRemaining = int64(remainingFloat64) | ||
| } else { | ||
| // If neither type matches, skip decrement | ||
| s.LogState() | ||
| return | ||
| } | ||
|
|
||
| newRemaining := currentRemaining - count | ||
| if newRemaining < 0 { | ||
| newRemaining = 0 | ||
| } | ||
| s.Streams[index].State.Store(RemainingRecordsCount, newRemaining) | ||
| s.Streams[index].HoldsValue.Store(true) | ||
| } | ||
| } | ||
| s.LogState() | ||
| } | ||
|
|
||
| func (s *State) MarshalJSON() ([]byte, error) { | ||
| type Alias State | ||
| p := Alias(*s) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.