datazip-inc · Itz-Agasta · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 4, 2025
diff --git a/destination/writers.go b/destination/writers.go
@@ -54,6 +54,7 @@ type (
 		batchSize      int64
 		streamArtifact *writerSchema
 		group          *utils.CxGroup
+		committedCount int64
 	}
 )
 
@@ -254,7 +255,9 @@ func (wt *WriterThread) flush(ctx context.Context, buf []types.RawRecord) (err e
 		return fmt.Errorf("failed to write records: %s", err)
 	}
 
-	logger.Infof("Thread[%s]: successfully wrote %d records", wt.threadID, len(buf))
+	// Track successfully committed records
+	wt.committedCount += int64(len(buf))
+	logger.Infof("Thread[%s]: successfully wrote %d records (total committed: %d)", wt.threadID, len(buf), wt.committedCount)
 	return nil
 }
 
@@ -279,3 +282,8 @@ func (wt *WriterThread) Close(ctx context.Context) error {
 		return wt.writer.Close(ctx)
 	}
 }
+
+// GetCommittedCount returns the total number of records successfully committed by this writer thread
+func (wt *WriterThread) GetCommittedCount() int64 {
+	return wt.committedCount
+}
diff --git a/drivers/abstract/backfill.go b/drivers/abstract/backfill.go
@@ -17,13 +17,24 @@ import (
 func (a *AbstractDriver) Backfill(ctx context.Context, backfilledStreams chan string, pool *destination.WriterPool, stream types.StreamInterface) error {
 	chunksSet := a.state.GetChunks(stream.Self())
 	var err error
+	isResumedSync := false
 	if chunksSet == nil || chunksSet.Len() == 0 {
 		chunksSet, err = a.driver.GetOrSplitChunks(ctx, pool, stream)
 		if err != nil {
 			return fmt.Errorf("failed to get or split chunks: %s", err)
 		}
 		// set state chunks
 		a.state.SetChunks(stream.Self(), chunksSet)
+	} else {
+		// This is a resumed sync - restore stats from state
+		isResumedSync = true
+		totalCount := a.state.GetTotalRecordCount(stream.Self())
+
+		if totalCount > 0 {
+			logger.Infof("Resuming sync for stream %s: total records = %d", stream.ID(), totalCount)
+			// Restore total count to pool stats for progress tracking
+			pool.AddRecordsToSyncStats(totalCount)
+		}
 	}
 	chunks := chunksSet.Array()
 	if len(chunks) == 0 {
@@ -37,7 +48,11 @@ func (a *AbstractDriver) Backfill(ctx context.Context, backfilledStreams chan st
 	sort.Slice(chunks, func(i, j int) bool {
 		return typeutils.Compare(chunks[i].Min, chunks[j].Min) < 0
 	})
-	logger.Infof("Starting backfill for stream[%s] with %d chunks", stream.GetStream().Name, len(chunks))
+	if isResumedSync {
+		logger.Infof("Resuming backfill for stream[%s] with %d remaining chunks", stream.GetStream().Name, len(chunks))
+	} else {
+		logger.Infof("Starting backfill for stream[%s] with %d chunks", stream.GetStream().Name, len(chunks))
+	}
 	// TODO: create writer instance again on retry
 	chunkProcessor := func(ctx context.Context, chunk types.Chunk) (err error) {
 		threadID := fmt.Sprintf("%s_%s", stream.ID(), utils.ULID())
@@ -60,6 +75,15 @@ func (a *AbstractDriver) Backfill(ctx context.Context, backfilledStreams chan st
 			if err == nil {
 				logger.Infof("finished chunk min[%v] and max[%v] of stream %s", chunk.Min, chunk.Max, stream.ID())
 				chunksLeft := a.state.RemoveChunk(stream.Self(), chunk)
+
+				// Update synced record count in state based on committed records only
+				// This represents records that have been successfully written to the destination
+				committedRecords := inserter.GetCommittedCount()
+				previousSyncedCount := a.state.GetSyncedRecordCount(stream.Self())
+				totalSyncedCount := previousSyncedCount + committedRecords
+				a.state.SetSyncedRecordCount(stream.Self(), totalSyncedCount)
+				logger.Infof("Stream %s: chunk completed with %d committed records (total synced: %d)", stream.ID(), committedRecords, totalSyncedCount)
+
 				if chunksLeft == 0 && backfilledStreams != nil {
 					backfilledStreams <- stream.ID()
 				}

diff --git a/drivers/abstract/cdc.go b/drivers/abstract/cdc.go
@@ -78,6 +78,17 @@ func (a *AbstractDriver) RunChangeStream(ctx context.Context, pool *destination.
 							err = fmt.Errorf("post cdc error: %s, cdc insert thread error: %s", postCDCErr, err)
 						}
 
+						// Update synced record count based on committed records
+						if err == nil {
+							committedRecords := inserter.GetCommittedCount()
+							if committedRecords > 0 {
+								previousSyncedCount := a.state.GetSyncedRecordCount(streams[index].Self())
+								totalSyncedCount := previousSyncedCount + committedRecords
+								a.state.SetSyncedRecordCount(streams[index].Self(), totalSyncedCount)
+								logger.Infof("Stream %s cdc: committed %d records (total synced: %d)", streams[index].ID(), committedRecords, totalSyncedCount)
+							}
+						}
+
 						if err != nil {
 							err = fmt.Errorf("thread[%s]: %s", threadID, err)
 						}
@@ -135,6 +146,19 @@ func (a *AbstractDriver) RunChangeStream(ctx context.Context, pool *destination.
 			if postCDCErr != nil {
 				err = fmt.Errorf("post cdc error: %s, cdc insert thread error: %s", postCDCErr, err)
 			}
+
+			// Update synced record counts based on committed records
+			if err == nil {
+				for stream, insert := range inserters {
+					committedRecords := insert.GetCommittedCount()
+					if committedRecords > 0 {
+						previousSyncedCount := a.state.GetSyncedRecordCount(stream.Self())
+						totalSyncedCount := previousSyncedCount + committedRecords
+						a.state.SetSyncedRecordCount(stream.Self(), totalSyncedCount)
+						logger.Infof("Stream %s cdc: committed %d records (total synced: %d)", stream.ID(), committedRecords, totalSyncedCount)
+					}
+				}
+			}
 		}()
 		return RetryOnBackoff(a.driver.MaxRetries(), constants.DefaultRetryTimeout, func() error {
 			return a.driver.StreamChanges(ctx, nil, func(ctx context.Context, change CDCChange) error {

diff --git a/drivers/abstract/incremental.go b/drivers/abstract/incremental.go
@@ -100,6 +100,15 @@ func (a *AbstractDriver) Incremental(ctx context.Context, pool *destination.Writ
 					if err == nil {
 						a.state.SetCursor(stream.Self(), primaryCursor, a.reformatCursorValue(maxPrimaryCursorValue))
 						a.state.SetCursor(stream.Self(), secondaryCursor, a.reformatCursorValue(maxSecondaryCursorValue))
+
+						// Update synced record count based on committed records
+						committedRecords := inserter.GetCommittedCount()
+						if committedRecords > 0 {
+							previousSyncedCount := a.state.GetSyncedRecordCount(stream.Self())
+							totalSyncedCount := previousSyncedCount + committedRecords
+							a.state.SetSyncedRecordCount(stream.Self(), totalSyncedCount)
+							logger.Infof("Stream %s incremental: committed %d records (total synced: %d)", stream.ID(), committedRecords, totalSyncedCount)
+						}
 					} else {
 						err = fmt.Errorf("thread[%s]: %s", threadID, err)
 					}

diff --git a/drivers/mongodb/internal/backfill.go b/drivers/mongodb/internal/backfill.go
@@ -72,6 +72,11 @@ func (m *Mongo) GetOrSplitChunks(ctx context.Context, pool *destination.WriterPo
 
 	logger.Infof("Total expected count for stream %s: %d", stream.ID(), recordCount)
 	pool.AddRecordsToSyncStats(recordCount)
+
+	// Persist total record count to state for resume capability
+	if m.state != nil {
+		m.state.SetTotalRecordCount(stream.Self(), recordCount)
+	}
 
 	// Generate and update chunks
 	var retryErr error

diff --git a/drivers/mysql/internal/backfill.go b/drivers/mysql/internal/backfill.go
@@ -94,6 +94,12 @@ func (m *MySQL) GetOrSplitChunks(ctx context.Context, pool *destination.WriterPo
 	}
 
 	pool.AddRecordsToSyncStats(approxRowCount)
+
+	// Persist total record count to state for resume capability
+	if m.state != nil {
+		m.state.SetTotalRecordCount(stream.Self(), approxRowCount)
+	}
+
 	// avgRowSize is returned as []uint8 which is converted to float64
 	avgRowSizeFloat, err := typeutils.ReformatFloat64(avgRowSize)
 	if err != nil {

diff --git a/drivers/postgres/internal/backfill.go b/drivers/postgres/internal/backfill.go
@@ -70,6 +70,12 @@ func (p *Postgres) GetOrSplitChunks(_ context.Context, pool *destination.WriterP
 		return nil, fmt.Errorf("failed to get approx row count: %s", err)
 	}
 	pool.AddRecordsToSyncStats(approxRowCount)
+
+	// Persist total record count to state for resume capability
+	if p.state != nil {
+		p.state.SetTotalRecordCount(stream.Self(), approxRowCount)
+	}
+
 	return p.splitTableIntoChunks(stream)
 }
 

diff --git a/types/state.go b/types/state.go
@@ -24,6 +24,9 @@ const (
 	MixedType StateType = "MIXED"
 	// constant key for chunks
 	ChunksKey = "chunks"
+	// constant keys for stats
+	TotalRecordCountKey  = "total_record_count"
+	SyncedRecordCountKey = "synced_record_count"
 )
 
 type GlobalState struct {
@@ -254,6 +257,90 @@ func (s *State) RemoveChunk(stream *ConfiguredStream, chunk Chunk) int {
 	return -1
 }
 
+// GetTotalRecordCount retrieves the total record count for a stream from state
+func (s *State) GetTotalRecordCount(stream *ConfiguredStream) int64 {
+	s.RLock()
+	defer s.RUnlock()
+
+	index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool {
+		return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name()
+	})
+	if contains {
+		if count, loaded := s.Streams[index].State.Load(TotalRecordCountKey); loaded {
+			if countInt64, ok := count.(int64); ok {
+				return countInt64
+			}
+			// Handle case where count might be stored as float64 (from JSON unmarshalling)
+			if countFloat64, ok := count.(float64); ok {
+				return int64(countFloat64)
+			}
+		}
+	}
+	return 0
+}
+
+// SetTotalRecordCount stores the total record count for a stream in state
+func (s *State) SetTotalRecordCount(stream *ConfiguredStream, count int64) {
+	s.Lock()
+	defer s.Unlock()
+
+	index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool {
+		return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name()
+	})
+	if contains {
+		s.Streams[index].State.Store(TotalRecordCountKey, count)
+		s.Streams[index].HoldsValue.Store(true)
+	} else {
+		newStream := s.initStreamState(stream)
+		newStream.State.Store(TotalRecordCountKey, count)
+		newStream.HoldsValue.Store(true)
+		s.Streams = append(s.Streams, newStream)
+	}
+	s.LogState()
+}
+
+// GetSyncedRecordCount retrieves the synced record count for a stream from state
+func (s *State) GetSyncedRecordCount(stream *ConfiguredStream) int64 {
+	s.RLock()
+	defer s.RUnlock()
+
+	index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool {
+		return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name()
+	})
+	if contains {
+		if count, loaded := s.Streams[index].State.Load(SyncedRecordCountKey); loaded {
+			if countInt64, ok := count.(int64); ok {
+				return countInt64
+			}
+			// Handle case where count might be stored as float64 (from JSON unmarshalling)
+			if countFloat64, ok := count.(float64); ok {
+				return int64(countFloat64)
+			}
+		}
+	}
+	return 0
+}
+
+// SetSyncedRecordCount stores the synced record count for a stream in state
+func (s *State) SetSyncedRecordCount(stream *ConfiguredStream, count int64) {
+	s.Lock()
+	defer s.Unlock()
+
+	index, contains := utils.ArrayContains(s.Streams, func(elem *StreamState) bool {
+		return elem.Namespace == stream.Namespace() && elem.Stream == stream.Name()
+	})
+	if contains {
+		s.Streams[index].State.Store(SyncedRecordCountKey, count)
+		s.Streams[index].HoldsValue.Store(true)
+	} else {
+		newStream := s.initStreamState(stream)
+		newStream.State.Store(SyncedRecordCountKey, count)
+		newStream.HoldsValue.Store(true)
+		s.Streams = append(s.Streams, newStream)
+	}
+	s.LogState()
+}
+
 func (s *State) MarshalJSON() ([]byte, error) {
 	type Alias State
 	p := Alias(*s)

diff --git a/utils/typeutils/reformat.go b/utils/typeutils/reformat.go
@@ -204,9 +204,9 @@ func ReformatDate(v interface{}) (time.Time, error) {
 	}
 
 	// manage year limit
-	// even after data being parsed if year doesn't lie in range [0,9999] it failed to get marshaled
-	if parsed.Year() < 0 {
-		parsed = parsed.AddDate(0-parsed.Year(), 0, 0)
+	// even after data being parsed if year doesn't lie in range [1,9999] it failed to get marshaled
+	if parsed.Year() <= 0 {
+		parsed = parsed.AddDate(1-parsed.Year(), 0, 0)
 	} else if parsed.Year() > 9999 {
 		parsed = parsed.AddDate(-(parsed.Year() - 9999), 0, 0)
 	}