@@ -64,20 +64,25 @@ public class TieringSplitReader<WriteResult>
6464
6565 private static final Logger LOG = LoggerFactory .getLogger (TieringSplitReader .class );
6666
67- private static final Duration POLL_TIMEOUT = Duration .ofMillis (10000L );
67+ public static final Duration DEFAULT_POLL_TIMEOUT = Duration .ofMillis (10_000L );
6868
6969 // unknown bucket timestamp for empty split or snapshot split
7070 private static final long UNKNOWN_BUCKET_TIMESTAMP = -1 ;
7171
72- private static final long UNKNOW_BUCKET_OFFSET = -1 ;
72+ // unknown bucket offset for empty split or snapshot split
73+ private static final long UNKNOWN_BUCKET_OFFSET = -1 ;
7374
7475 private final LakeTieringFactory <WriteResult , ?> lakeTieringFactory ;
7576
77+ private final Duration pollTimeout ;
78+
7679 // the id for the pending tables to be tiered
7780 private final Queue <Long > pendingTieringTables ;
7881 // the table_id to the pending splits
7982 private final Map <Long , Set <TieringSplit >> pendingTieringSplits ;
8083
84+ private final Set <Long > reachTieringDeadlineTables ;
85+
8186 private final Map <TableBucket , LakeWriter <WriteResult >> lakeWriters ;
8287 private final Connection connection ;
8388
@@ -99,11 +104,15 @@ public class TieringSplitReader<WriteResult>
99104
100105 private final Set <TieringSplit > currentEmptySplits ;
101106
102- // Flag to indicate if the current table has timed out and should be force completed
103- private boolean currentTableTieringTimedOut ;
104-
105107 public TieringSplitReader (
106108 Connection connection , LakeTieringFactory <WriteResult , ?> lakeTieringFactory ) {
109+ this (connection , lakeTieringFactory , DEFAULT_POLL_TIMEOUT );
110+ }
111+
112+ protected TieringSplitReader (
113+ Connection connection ,
114+ LakeTieringFactory <WriteResult , ?> lakeTieringFactory ,
115+ Duration pollTimeout ) {
107116 this .lakeTieringFactory = lakeTieringFactory ;
108117 // owned by TieringSourceReader
109118 this .connection = connection ;
@@ -115,7 +124,8 @@ public TieringSplitReader(
115124 this .currentTableSplitsByBucket = new HashMap <>();
116125 this .lakeWriters = new HashMap <>();
117126 this .currentPendingSnapshotSplits = new ArrayDeque <>();
118- this .currentTableTieringTimedOut = false ;
127+ this .reachTieringDeadlineTables = new HashSet <>();
128+ this .pollTimeout = pollTimeout ;
119129 }
120130
121131 @ Override
@@ -147,10 +157,10 @@ public RecordsWithSplitIds<TableBucketWriteResult<WriteResult>> fetch() throws I
147157 }
148158 } else {
149159 if (currentLogScanner != null ) {
150- if (currentTableTieringTimedOut ) {
160+ if (reachTieringDeadlineTables . contains ( currentTableId ) ) {
151161 return forceCompleteTieringLogRecords ();
152162 }
153- ScanRecords scanRecords = currentLogScanner .poll (POLL_TIMEOUT );
163+ ScanRecords scanRecords = currentLogScanner .poll (pollTimeout );
154164 // force to complete records
155165 return forLogRecords (scanRecords );
156166 } else {
@@ -170,6 +180,8 @@ public void handleSplitsChanges(SplitsChange<TieringSplit> splitsChange) {
170180 for (TieringSplit split : splitsChange .splits ()) {
171181 LOG .info ("add split {}" , split .splitId ());
172182 if (split .isForceIgnore ()) {
183+ // if the split is forced to ignore,
184+ // mark it as empty
173185 currentEmptySplits .add (split );
174186 continue ;
175187 }
@@ -273,25 +285,40 @@ private void mayCreateLogScanner() {
273285 forceCompleteTieringLogRecords () throws IOException {
274286 Map <TableBucket , TableBucketWriteResult <WriteResult >> writeResults = new HashMap <>();
275287 Map <TableBucket , String > finishedSplitIds = new HashMap <>();
276- for (Map .Entry <TableBucket , LakeWriter <WriteResult >> entry : lakeWriters .entrySet ()) {
288+
289+ // force finish all splits
290+ Iterator <Map .Entry <TableBucket , TieringSplit >> currentTieringSplitsIterator =
291+ currentTableSplitsByBucket .entrySet ().iterator ();
292+ while (currentTieringSplitsIterator .hasNext ()) {
293+ Map .Entry <TableBucket , TieringSplit > entry = currentTieringSplitsIterator .next ();
277294 TableBucket bucket = entry .getKey ();
278- TieringSplit split = currentTableSplitsByBucket . get ( bucket );
295+ TieringSplit split = entry . getValue ( );
279296 if (split != null && split .isTieringLogSplit ()) {
297+ // get the current offset, timestamp that tiered so far
280298 LogOffsetAndTimestamp logOffsetAndTimestamp =
281299 currentTableTieredOffsetAndTimestamp .get (bucket );
300+ long logEndOffset =
301+ logOffsetAndTimestamp == null
302+ ? UNKNOWN_BUCKET_OFFSET
303+ // logEngOffset is equal to offset tiered + 1
304+ : logOffsetAndTimestamp .logOffset + 1 ;
305+ long timestamp =
306+ logOffsetAndTimestamp == null
307+ ? UNKNOWN_BUCKET_TIMESTAMP
308+ : logOffsetAndTimestamp .timestamp ;
282309 TableBucketWriteResult <WriteResult > bucketWriteResult =
283310 completeLakeWriter (
284- bucket ,
285- split .getPartitionName (),
286- logOffsetAndTimestamp .logOffset ,
287- logOffsetAndTimestamp .timestamp );
311+ bucket , split .getPartitionName (), logEndOffset , timestamp );
288312 writeResults .put (bucket , bucketWriteResult );
289313 finishedSplitIds .put (bucket , split .splitId ());
290314 LOG .info (
291315 "Split {} is forced to be finished due to tiering timeout." ,
292316 split .splitId ());
317+ currentTieringSplitsIterator .remove ();
293318 }
294319 }
320+ reachTieringDeadlineTables .remove (this .currentTableId );
321+ mayFinishCurrentTable ();
295322 return new TableBucketWriteResultWithSplitIds (writeResults , finishedSplitIds );
296323 }
297324
@@ -343,7 +370,11 @@ private RecordsWithSplitIds<TableBucketWriteResult<WriteResult>> forLogRecords(
343370 lastRecord .timestamp ()));
344371 // put split of the bucket
345372 finishedSplitIds .put (bucket , currentSplitId );
346- LOG .info ("Split {} has been finished." , currentSplitId );
373+ LOG .info (
374+ "Finish tier bucket {} for table {}, split: {}." ,
375+ bucket ,
376+ currentTablePath ,
377+ currentSplitId );
347378 }
348379 }
349380
@@ -377,8 +408,11 @@ private TableBucketWriteResult<WriteResult> completeLakeWriter(
377408 long maxTimestamp )
378409 throws IOException {
379410 LakeWriter <WriteResult > lakeWriter = lakeWriters .remove (bucket );
380- WriteResult writeResult = lakeWriter .complete ();
381- lakeWriter .close ();
411+ WriteResult writeResult = null ;
412+ if (lakeWriter != null ) {
413+ writeResult = lakeWriter .complete ();
414+ lakeWriter .close ();
415+ }
382416 return toTableBucketWriteResult (
383417 currentTablePath ,
384418 bucket ,
@@ -402,7 +436,7 @@ private TableBucketWriteResultWithSplitIds forEmptySplits(Set<TieringSplit> empt
402436 tableBucket ,
403437 tieringSplit .getPartitionName (),
404438 null ,
405- UNKNOW_BUCKET_OFFSET ,
439+ UNKNOWN_BUCKET_OFFSET ,
406440 UNKNOWN_BUCKET_TIMESTAMP ,
407441 tieringSplit .getNumberOfSplits ()));
408442 }
@@ -412,7 +446,6 @@ private TableBucketWriteResultWithSplitIds forEmptySplits(Set<TieringSplit> empt
412446 private void mayFinishCurrentTable () throws IOException {
413447 // no any pending splits for the table, just finish the table
414448 if (currentTableSplitsByBucket .isEmpty ()) {
415- LOG .info ("Finish tier table {} of table id {}." , currentTablePath , currentTableId );
416449 finishCurrentTable ();
417450 }
418451 }
@@ -427,6 +460,11 @@ private TableBucketWriteResultWithSplitIds finishCurrentSnapshotSplit() throws I
427460 currentSnapshotSplit .getPartitionName (),
428461 logEndOffset ,
429462 UNKNOWN_BUCKET_TIMESTAMP );
463+ LOG .info (
464+ "Finish tier bucket {} for table {}, split: {}." ,
465+ tableBucket ,
466+ currentTablePath ,
467+ splitId );
430468 closeCurrentSnapshotSplit ();
431469 mayFinishCurrentTable ();
432470 return new TableBucketWriteResultWithSplitIds (
@@ -492,15 +530,13 @@ private void finishCurrentTable() throws IOException {
492530 }
493531
494532 /**
495- * Handle timeout event for a table . This will mark the current table as timed out, and it will
496- * be force completed in the next fetch cycle.
533+ * Handle a table reach tiered deadline . This will mark the current table as timed out, and it
534+ * will be force completed in the next fetch cycle.
497535 */
498- public void handleTableTimeout (long tableId ) {
499- if (currentTableId != null && currentTableId .equals (tableId )) {
500- LOG .debug (
501- "Table {} tiering timeout event received, will try best to force complete after current processing." ,
502- tableId );
503- currentTableTieringTimedOut = true ;
536+ public void handleTableReachTieringDeadline (long tableId ) {
537+ if ((currentTableId != null && currentTableId .equals (tableId )
538+ || pendingTieringSplits .containsKey (tableId ))) {
539+ reachTieringDeadlineTables .add (tableId );
504540 }
505541 }
506542
@@ -624,6 +660,7 @@ public Set<String> finishedSplits() {
624660 }
625661
626662 private static final class LogOffsetAndTimestamp {
663+
627664 private final long logOffset ;
628665 private final long timestamp ;
629666
0 commit comments