【connnector] Fix leader not found when Fluss limit scan on partition table.

loserwang1024 · loserwang1024 · commit 939efdf9c41e · 2024-12-12T16:29:44.000+08:00
diff --git a/fluss-client/src/main/java/com/alibaba/fluss/client/table/FlussTable.java b/fluss-client/src/main/java/com/alibaba/fluss/client/table/FlussTable.java
@@ -55,6 +55,7 @@
 import com.alibaba.fluss.record.MemoryLogRecords;
 import com.alibaba.fluss.record.ValueRecord;
 import com.alibaba.fluss.record.ValueRecordReadContext;
+import com.alibaba.fluss.row.GenericRow;
 import com.alibaba.fluss.row.InternalRow;
 import com.alibaba.fluss.row.ProjectedRow;
 import com.alibaba.fluss.row.decode.RowDecoder;
@@ -211,17 +212,27 @@ private int getBucketId(byte[] keyBytes, InternalRow key) {
     @Override
     public CompletableFuture<List<ScanRecord>> limitScan(
             TableBucket tableBucket, int limit, @Nullable int[] projectedFields) {
-        // because that rocksdb is not suitable to projection, thus do it in client.
-        int leader = metadataUpdater.leaderFor(tableBucket);
+
         LimitScanRequest limitScanRequest =
                 new LimitScanRequest()
                         .setTableId(tableBucket.getTableId())
                         .setBucketId(tableBucket.getBucket())
                         .setLimit(limit);
+
         if (tableBucket.getPartitionId() != null) {
             limitScanRequest.setPartitionId(tableBucket.getPartitionId());
+            metadataUpdater.checkAndUpdateMetadata(tablePath, tableBucket);
         }
+
+        // because that rocksdb is not suitable to projection, thus do it in client.
+        int leader = metadataUpdater.leaderFor(tableBucket);
         TabletServerGateway gateway = metadataUpdater.newTabletServerClientForNode(leader);
+        RowType rowType = tableInfo.getTableDescriptor().getSchema().toRowType();
+        InternalRow.FieldGetter[] fieldGetters =
+                new InternalRow.FieldGetter[rowType.getFieldCount()];
+        for (int i = 0; i < rowType.getFieldCount(); i++) {
+            fieldGetters[i] = InternalRow.createFieldGetter(rowType.getTypeAt(i), i);
+        }
 
         CompletableFuture<List<ScanRecord>> future = new CompletableFuture<>();
         gateway.limitScan(limitScanRequest)
@@ -233,7 +244,8 @@ public CompletableFuture<List<ScanRecord>> limitScan(
                                                 limit,
                                                 limitScantResponse,
                                                 projectedFields,
-                                                hasPrimaryKey));
+                                                hasPrimaryKey,
+                                                fieldGetters));
                             } else {
                                 throw ApiError.fromErrorMessage(limitScantResponse).exception();
                             }
@@ -250,7 +262,8 @@ private List<ScanRecord> parseLimitScanResponse(
             int limit,
             LimitScanResponse limitScanResponse,
             @Nullable int[] projectedFields,
-            boolean hasPrimaryKey) {
+            boolean hasPrimaryKey,
+            InternalRow.FieldGetter[] fieldGetters) {
         List<ScanRecord> scanRecordList = new ArrayList<>();
         if (!limitScanResponse.hasRecords()) {
             return scanRecordList;
@@ -262,45 +275,54 @@ private List<ScanRecord> parseLimitScanResponse(
             ValueRecordReadContext readContext =
                     new ValueRecordReadContext(kvValueDecoder.getRowDecoder());
             for (ValueRecord record : valueRecords.records(readContext)) {
-                InternalRow originRow = record.getRow();
-                if (projectedFields != null) {
-                    ProjectedRow row = ProjectedRow.from(projectedFields);
-                    row.replaceRow(originRow);
-                    scanRecordList.add(new ScanRecord(row));
-                } else {
-                    scanRecordList.add(new ScanRecord(originRow));
-                }
+                addScanRecord(projectedFields, scanRecordList, record.getRow(), fieldGetters);
             }
         } else {
             LogRecordReadContext readContext =
                     LogRecordReadContext.createReadContext(tableInfo, null);
             LogRecords records = MemoryLogRecords.pointToByteBuffer(recordsBuffer);
             for (LogRecordBatch logRecordBatch : records.batches()) {
-                // A batch of log record maybe little more than limit, thus we need slice the last
-                // limit number.
-                CloseableIterator<LogRecord> logRecordIterator =
-                        logRecordBatch.records(readContext);
-                while (logRecordIterator.hasNext()) {
-                    InternalRow originRow = logRecordIterator.next().getRow();
-                    if (projectedFields != null) {
-                        ProjectedRow row = ProjectedRow.from(projectedFields);
-                        row.replaceRow(originRow);
-                        scanRecordList.add(new ScanRecord(row));
-                    } else {
-                        scanRecordList.add(new ScanRecord(originRow));
+                // A batch of log record maybe little more than limit, thus we need slice the
+                // last limit number.
+                try (CloseableIterator<LogRecord> logRecordIterator =
+                        logRecordBatch.records(readContext)) {
+                    while (logRecordIterator.hasNext()) {
+                        addScanRecord(
+                                projectedFields,
+                                scanRecordList,
+                                logRecordIterator.next().getRow(),
+                                fieldGetters);
                     }
                 }
             }
-
             if (scanRecordList.size() > limit) {
                 scanRecordList =
                         scanRecordList.subList(
                                 scanRecordList.size() - limit, scanRecordList.size());
             }
         }
+
         return scanRecordList;
     }
 
+    private void addScanRecord(
+            @Nullable int[] projectedFields,
+            List<ScanRecord> scanRecordList,
+            InternalRow originRow,
+            InternalRow.FieldGetter[] fieldGetters) {
+        GenericRow newRow = new GenericRow(fieldGetters.length);
+        for (int i = 0; i < fieldGetters.length; i++) {
+            newRow.setField(i, fieldGetters[i].getFieldOrNull(originRow));
+        }
+        if (projectedFields != null) {
+            ProjectedRow row = ProjectedRow.from(projectedFields);
+            row.replaceRow(newRow);
+            scanRecordList.add(new ScanRecord(row));
+        } else {
+            scanRecordList.add(new ScanRecord(newRow));
+        }
+    }
+
     /**
      * Return the id of the partition the row belongs to. It'll try to update the metadata if the
      * partition doesn't exist. If the partition doesn't exist yet after update metadata, it'll
diff --git a/fluss-connectors/fluss-connector-flink/src/main/java/com/alibaba/fluss/connector/flink/utils/PushdownUtils.java b/fluss-connectors/fluss-connector-flink/src/main/java/com/alibaba/fluss/connector/flink/utils/PushdownUtils.java
@@ -365,7 +365,7 @@ public static Collection<RowData> limitScan(
                                 .collect(Collectors.toList()));
             }
 
-            return limit < responseList.size() ? rowDataList.subList(0, (int) limit) : rowDataList;
+            return limit < rowDataList.size() ? rowDataList.subList(0, (int) limit) : rowDataList;
         } catch (Exception e) {
             throw new FlussRuntimeException(e);
         }
diff --git a/fluss-connectors/fluss-connector-flink/src/test/java/com/alibaba/fluss/connector/flink/source/FlinkTableSourceBatchITCase.java b/fluss-connectors/fluss-connector-flink/src/test/java/com/alibaba/fluss/connector/flink/source/FlinkTableSourceBatchITCase.java
@@ -270,6 +270,14 @@ void testLimitLogTableScan() throws Exception {
                         "+I[5, name5]");
         assertThat(collected).isSubsetOf(expected);
         assertThat(collected).hasSize(3);
+
+        // test partition table.
+        String partitionTable = preparePartitionedLogTable();
+        query = String.format("SELECT id, name FROM %s limit 3", partitionTable);
+        iterRows = tEnv.executeSql(query).collect();
+        collected = assertAndCollectRecords(iterRows, 3);
+        assertThat(collected).isSubsetOf(expected);
+        assertThat(collected).hasSize(3);
     }
 
     @ParameterizedTest

Original file line number	Diff line number	Diff line change
`@@ -365,7 +365,7 @@ public static Collection<RowData> limitScan(`
`365`	`365`	`.collect(Collectors.toList()));`
`366`	`366`	`}`
`367`	`367`
`368`		`- return limit < responseList.size() ? rowDataList.subList(0, (int) limit) : rowDataList;`
	`368`	`+ return limit < rowDataList.size() ? rowDataList.subList(0, (int) limit) : rowDataList;`
`369`	`369`	`} catch (Exception e) {`
`370`	`370`	`throw new FlussRuntimeException(e);`
`371`	`371`	`}`