Skip to content

Commit 2132b2a

Browse files
committed
Merge remote-tracking branch 'origin/flink/connectors-upgrade-to-new-sink-api' into flink/connectors-upgrade-to-new-sink-api
2 parents 2da3bdc + 5f93c5f commit 2132b2a

File tree

29 files changed

+1192
-325
lines changed

29 files changed

+1192
-325
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
wrapperVersion=3.3.2
18+
distributionType=only-script
19+
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.6/apache-maven-3.8.6-bin.zip

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ Prerequisites for building Fluss:
2626
```bash
2727
git clone https://github.com/alibaba/fluss.git
2828
cd fluss
29-
mvn clean package -DskipTests
29+
./mvnw clean package -DskipTests
3030
```
3131

32-
Fluss is now installed in `build-target`.
32+
Fluss is now installed in `build-target`. The build command uses Maven Wrapper (`mvnw`) which ensures the correct Maven version is used.
3333

3434
## Contributing
3535

fluss-connectors/fluss-connector-flink/src/main/java/com/alibaba/fluss/connector/flink/source/enumerator/FlinkSourceEnumerator.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ public class FlinkSourceEnumerator
124124

125125
private boolean lakeEnabled = false;
126126

127+
private volatile boolean closed = false;
128+
127129
public FlinkSourceEnumerator(
128130
TablePath tablePath,
129131
Configuration flussConf,
@@ -266,6 +268,10 @@ private Set<PartitionInfo> listPartitions() {
266268

267269
/** Init the splits for Fluss. */
268270
private void checkPartitionChanges(Set<PartitionInfo> partitionInfos, Throwable t) {
271+
if (closed) {
272+
// skip if the enumerator is closed to avoid unnecessary error logs
273+
return;
274+
}
269275
if (t != null) {
270276
LOG.error("Failed to list partitions for {}", tablePath, t);
271277
return;
@@ -678,6 +684,7 @@ public SourceEnumeratorState snapshotState(long checkpointId) {
678684
@Override
679685
public void close() throws IOException {
680686
try {
687+
closed = true;
681688
if (flussAdmin != null) {
682689
flussAdmin.close();
683690
}

fluss-connectors/fluss-connector-flink/src/main/java/com/alibaba/fluss/connector/flink/source/reader/FlinkSourceSplitReader.java

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@
3131
import com.alibaba.fluss.connector.flink.source.split.LogSplit;
3232
import com.alibaba.fluss.connector.flink.source.split.SnapshotSplit;
3333
import com.alibaba.fluss.connector.flink.source.split.SourceSplitBase;
34+
import com.alibaba.fluss.exception.PartitionNotExistException;
3435
import com.alibaba.fluss.metadata.TableBucket;
3536
import com.alibaba.fluss.metadata.TablePath;
3637
import com.alibaba.fluss.types.RowType;
3738
import com.alibaba.fluss.utils.CloseableIterator;
39+
import com.alibaba.fluss.utils.ExceptionUtils;
3840

3941
import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
4042
import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
@@ -103,6 +105,8 @@ public class FlinkSourceSplitReader implements SplitReader<RecordAndPos, SourceS
103105
private LakeSplitReaderGenerator lakeSplitReaderGenerator;
104106

105107
private final Set<String> emptyLogSplits;
108+
// track split IDs corresponding to removed partitions
109+
private final Set<String> removedSplits = new HashSet<>();
106110

107111
public FlinkSourceSplitReader(
108112
Configuration flussConf,
@@ -128,6 +132,13 @@ public FlinkSourceSplitReader(
128132

129133
@Override
130134
public RecordsWithSplitIds<RecordAndPos> fetch() throws IOException {
135+
if (!removedSplits.isEmpty()) {
136+
FlinkRecordsWithSplitIds records =
137+
new FlinkRecordsWithSplitIds(
138+
new HashSet<>(removedSplits), flinkSourceReaderMetrics);
139+
removedSplits.clear();
140+
return records;
141+
}
131142
checkSnapshotSplitOrStartNext();
132143
if (currentBoundedSplitReader != null) {
133144
CloseableIterator<RecordAndPos> recordIterator = currentBoundedSplitReader.readBatch();
@@ -242,37 +253,78 @@ private void subscribeLog(SourceSplitBase split, long startingOffset) {
242253
Long partitionId = tableBucket.getPartitionId();
243254
int bucket = tableBucket.getBucket();
244255
if (partitionId != null) {
245-
logScanner.subscribe(partitionId, bucket, startingOffset);
256+
// Try to subscribe using the partition id.
257+
try {
258+
logScanner.subscribe(partitionId, bucket, startingOffset);
259+
} catch (Exception e) {
260+
// the PartitionNotExistException may still happens when partition is removed
261+
// but Flink source reader failover before aware of it
262+
// Traverse the exception chain to check for PartitionNotExistException.
263+
boolean partitionNotExist =
264+
ExceptionUtils.findThrowable(e, PartitionNotExistException.class)
265+
.isPresent();
266+
if (partitionNotExist) {
267+
// mark the not exist partition to be removed
268+
removedSplits.add(split.splitId());
269+
LOG.warn(
270+
"Partition {} does not exist when subscribing to log for split {}. Skipping subscription.",
271+
partitionId,
272+
split.splitId());
273+
return;
274+
}
275+
}
246276
} else {
277+
// If no partition id, subscribe by bucket only.
247278
logScanner.subscribe(bucket, startingOffset);
248279
}
249280

250281
LOG.info(
251282
"Subscribe to read log for split {} from offset {}.",
252283
split.splitId(),
253284
startingOffset);
254-
255-
// Track the new bucket in metrics
285+
// Track the new bucket in metrics and internal state.
256286
flinkSourceReaderMetrics.registerTableBucket(tableBucket);
257287
subscribedBuckets.put(tableBucket, split.splitId());
258288
}
259289
}
260290

261291
public Set<TableBucket> removePartitions(Map<Long, String> removedPartitions) {
262-
// todo, may consider to close the current snapshot reader if
263-
// the current snapshot split is in the partition buckets
264-
265-
// may remove from pending snapshot splits
292+
// Set to collect table buckets that are unsubscribed.
266293
Set<TableBucket> unsubscribedTableBuckets = new HashSet<>();
294+
// First, if the current active bounded split belongs to a removed partition,
295+
// finish it so it will not be restored.
296+
if (currentBoundedSplit != null) {
297+
TableBucket currentBucket = currentBoundedSplit.getTableBucket();
298+
if (removedPartitions.containsKey(currentBucket.getPartitionId())) {
299+
try {
300+
// Mark the current split as finished.
301+
removedSplits.add(currentBoundedSplit.splitId());
302+
closeCurrentBoundedSplit();
303+
unsubscribedTableBuckets.add(currentBucket);
304+
LOG.info(
305+
"Mark current bounded split {} as finished for removed partition {}.",
306+
currentBucket,
307+
removedPartitions.get(currentBucket.getPartitionId()));
308+
} catch (IOException e) {
309+
LOG.warn(
310+
"Failed to close current bounded split for removed partition {}.",
311+
removedPartitions.get(currentBucket.getPartitionId()),
312+
e);
313+
}
314+
}
315+
}
316+
317+
// Remove pending snapshot splits whose table buckets belong to removed partitions.
267318
Iterator<SourceSplitBase> snapshotSplitIterator = boundedSplits.iterator();
268319
while (snapshotSplitIterator.hasNext()) {
269320
SourceSplitBase split = snapshotSplitIterator.next();
270321
TableBucket tableBucket = split.getTableBucket();
271322
if (removedPartitions.containsKey(tableBucket.getPartitionId())) {
323+
removedSplits.add(split.splitId());
272324
snapshotSplitIterator.remove();
273325
unsubscribedTableBuckets.add(tableBucket);
274326
LOG.info(
275-
"Cancel to read snapshot split {} for non-existed partition {}.",
327+
"Cancel reading snapshot split {} for removed partition {}.",
276328
split.splitId(),
277329
removedPartitions.get(tableBucket.getPartitionId()));
278330
}
@@ -289,6 +341,7 @@ public Set<TableBucket> removePartitions(Map<Long, String> removedPartitions) {
289341
logScanner.unsubscribe(
290342
checkNotNull(tableBucket.getPartitionId(), "partition id must be not null"),
291343
tableBucket.getBucket());
344+
removedSplits.add(tableBucketAndSplit.getValue());
292345
subscribeTableBucketIterator.remove();
293346
unsubscribedTableBuckets.add(tableBucket);
294347
LOG.info(
@@ -438,7 +491,7 @@ private long getStoppingOffset(TableBucket tableBucket) {
438491
return stoppingOffsets.getOrDefault(tableBucket, Long.MAX_VALUE);
439492
}
440493

441-
public FlinkRecordsWithSplitIds finishCurrentBoundedSplit() throws IOException {
494+
private FlinkRecordsWithSplitIds finishCurrentBoundedSplit() throws IOException {
442495
Set<String> finishedSplits =
443496
currentBoundedSplit instanceof HybridSnapshotLogSplit
444497
// is hybrid split, not to finish this split

0 commit comments

Comments
 (0)