Skip to content

Commit 2086b0e

Browse files
authored
[Improve] Improve MultiTableSinkWriter prepare commit performance (#6495)
* [Improve] Improve MultiTableSinkWriter prepare commit performance * update * update
1 parent 341615f commit 2086b0e

File tree

3 files changed

+75
-13
lines changed

3 files changed

+75
-13
lines changed

Diff for: .github/workflows/backend.yml

+14
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ jobs:
304304
java-version: ${{ matrix.java }}
305305
distribution: 'temurin'
306306
cache: 'maven'
307+
- name: free disk space
308+
run: tools/github/free_disk_space.sh
307309
- name: run updated modules integration test (part-1)
308310
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
309311
run: |
@@ -333,6 +335,8 @@ jobs:
333335
java-version: ${{ matrix.java }}
334336
distribution: 'temurin'
335337
cache: 'maven'
338+
- name: free disk space
339+
run: tools/github/free_disk_space.sh
336340
- name: run updated modules integration test (part-2)
337341
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
338342
run: |
@@ -393,6 +397,8 @@ jobs:
393397
java-version: ${{ matrix.java }}
394398
distribution: 'temurin'
395399
cache: 'maven'
400+
- name: free disk space
401+
run: tools/github/free_disk_space.sh
396402
- name: run updated modules integration test (part-4)
397403
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
398404
run: |
@@ -421,6 +427,8 @@ jobs:
421427
java-version: ${{ matrix.java }}
422428
distribution: 'temurin'
423429
cache: 'maven'
430+
- name: free disk space
431+
run: tools/github/free_disk_space.sh
424432
- name: run updated modules integration test (part-5)
425433
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
426434
run: |
@@ -449,6 +457,8 @@ jobs:
449457
java-version: ${{ matrix.java }}
450458
distribution: 'temurin'
451459
cache: 'maven'
460+
- name: free disk space
461+
run: tools/github/free_disk_space.sh
452462
- name: run updated modules integration test (part-6)
453463
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
454464
run: |
@@ -477,6 +487,8 @@ jobs:
477487
java-version: ${{ matrix.java }}
478488
distribution: 'temurin'
479489
cache: 'maven'
490+
- name: free disk space
491+
run: tools/github/free_disk_space.sh
480492
- name: run updated modules integration test (part-7)
481493
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
482494
run: |
@@ -506,6 +518,8 @@ jobs:
506518
java-version: ${{ matrix.java }}
507519
distribution: 'temurin'
508520
cache: 'maven'
521+
- name: free disk space
522+
run: tools/github/free_disk_space.sh
509523
- name: run updated modules integration test (part-8)
510524
if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
511525
run: |

Diff for: seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java

+56-13
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.seatunnel.api.sink.MultiTableResourceManager;
2121
import org.apache.seatunnel.api.sink.SinkWriter;
2222
import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter;
23+
import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
2324
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
2425

2526
import lombok.extern.slf4j.Slf4j;
@@ -34,6 +35,7 @@
3435
import java.util.concurrent.BlockingQueue;
3536
import java.util.concurrent.ExecutorService;
3637
import java.util.concurrent.Executors;
38+
import java.util.concurrent.Future;
3739
import java.util.concurrent.LinkedBlockingQueue;
3840
import java.util.concurrent.TimeUnit;
3941
import java.util.concurrent.atomic.AtomicInteger;
@@ -58,7 +60,9 @@ public MultiTableSinkWriter(
5860
AtomicInteger cnt = new AtomicInteger(0);
5961
executorService =
6062
Executors.newFixedThreadPool(
61-
queueSize,
63+
// we use it in `MultiTableWriterRunnable` and `prepare commit task`, so it
64+
// should be double.
65+
queueSize * 2,
6266
runnable -> {
6367
Thread thread = new Thread(runnable);
6468
thread.setDaemon(true);
@@ -71,9 +75,9 @@ public MultiTableSinkWriter(
7175
BlockingQueue<SeaTunnelRow> queue = new LinkedBlockingQueue<>(1024);
7276
Map<String, SinkWriter<SeaTunnelRow, ?, ?>> tableIdWriterMap = new HashMap<>();
7377
Map<SinkIdentifier, SinkWriter<SeaTunnelRow, ?, ?>> sinkIdentifierMap = new HashMap<>();
74-
int finalI = i;
78+
int queueIndex = i;
7579
sinkWriters.entrySet().stream()
76-
.filter(entry -> entry.getKey().getIndex() % queueSize == finalI)
80+
.filter(entry -> entry.getKey().getIndex() % queueSize == queueIndex)
7781
.forEach(
7882
entry -> {
7983
tableIdWriterMap.put(
@@ -119,6 +123,24 @@ private void subSinkErrorCheck() {
119123
}
120124
}
121125

126+
@Override
127+
public void applySchemaChange(SchemaChangeEvent event) throws IOException {
128+
subSinkErrorCheck();
129+
for (int i = 0; i < sinkWritersWithIndex.size(); i++) {
130+
for (Map.Entry<SinkIdentifier, SinkWriter<SeaTunnelRow, ?, ?>> sinkWriterEntry :
131+
sinkWritersWithIndex.get(i).entrySet()) {
132+
if (sinkWriterEntry
133+
.getKey()
134+
.getTableIdentifier()
135+
.equals(event.tablePath().getFullName())) {
136+
synchronized (runnable.get(i)) {
137+
sinkWriterEntry.getValue().applySchemaChange(event);
138+
}
139+
}
140+
}
141+
}
142+
}
143+
122144
@Override
123145
public void write(SeaTunnelRow element) throws IOException {
124146
if (!submitted) {
@@ -178,17 +200,38 @@ public Optional<MultiTableCommitInfo> prepareCommit() throws IOException {
178200
checkQueueRemain();
179201
subSinkErrorCheck();
180202
MultiTableCommitInfo multiTableCommitInfo = new MultiTableCommitInfo(new HashMap<>());
203+
List<Future<?>> futures = new ArrayList<>();
181204
for (int i = 0; i < sinkWritersWithIndex.size(); i++) {
182-
for (Map.Entry<SinkIdentifier, SinkWriter<SeaTunnelRow, ?, ?>> sinkWriterEntry :
183-
sinkWritersWithIndex.get(i).entrySet()) {
184-
synchronized (runnable.get(i)) {
185-
Optional<?> commit = sinkWriterEntry.getValue().prepareCommit();
186-
commit.ifPresent(
187-
o ->
188-
multiTableCommitInfo
189-
.getCommitInfo()
190-
.put(sinkWriterEntry.getKey(), o));
191-
}
205+
int subWriterIndex = i;
206+
futures.add(
207+
executorService.submit(
208+
() -> {
209+
synchronized (runnable.get(subWriterIndex)) {
210+
for (Map.Entry<SinkIdentifier, SinkWriter<SeaTunnelRow, ?, ?>>
211+
sinkWriterEntry :
212+
sinkWritersWithIndex
213+
.get(subWriterIndex)
214+
.entrySet()) {
215+
Optional<?> commit;
216+
try {
217+
commit = sinkWriterEntry.getValue().prepareCommit();
218+
} catch (IOException e) {
219+
throw new RuntimeException(e);
220+
}
221+
commit.ifPresent(
222+
o ->
223+
multiTableCommitInfo
224+
.getCommitInfo()
225+
.put(sinkWriterEntry.getKey(), o));
226+
}
227+
}
228+
}));
229+
}
230+
for (Future<?> future : futures) {
231+
try {
232+
future.get();
233+
} catch (Exception e) {
234+
throw new RuntimeException(e);
192235
}
193236
}
194237
return Optional.of(multiTableCommitInfo);

Diff for: seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/JdbcOutputFormat.java

+5
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ public synchronized void flush() throws IOException {
125125
ExceptionUtils.getMessage(flushException)));
126126
return;
127127
}
128+
if (batchCount == 0) {
129+
LOG.debug("No data to flush.");
130+
return;
131+
}
132+
128133
final int sleepMs = 1000;
129134
for (int i = 0; i <= jdbcConnectionConfig.getMaxRetries(); i++) {
130135
try {

0 commit comments

Comments
 (0)