Skip to content

Commit 0f84dc7

Browse files
authored
Merge branch 'apache:master' into master
2 parents 2d6d5db + cb1b232 commit 0f84dc7

File tree

362 files changed

+29542
-4127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

362 files changed

+29542
-4127
lines changed

.github/labeler.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,5 @@ doris-pipeline-connector:
8484
- flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-doris/**/*
8585
starrocks-pipeline-connector:
8686
- flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/**/*
87+
elasticsearch-pipeline-connector:
88+
- flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/**/*

.github/workflows/flink_cdc.yml

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,10 @@ env:
9494
flink-cdc-connect/flink-cdc-source-connectors/flink-connector-vitess-cdc,\
9595
flink-cdc-connect/flink-cdc-source-connectors/flink-sql-connector-vitess-cdc"
9696

97-
MODULES_E2E: "\
98-
flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests,\
97+
MODULES_PIPELINE_E2E: "\
98+
flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests"
99+
100+
MODULES_SOURCE_E2E: "\
99101
flink-cdc-e2e-tests/flink-cdc-source-e2e-tests"
100102

101103
jobs:
@@ -134,7 +136,8 @@ jobs:
134136
"oceanbase",
135137
"db2",
136138
"vitess",
137-
"e2e"
139+
"pipeline_e2e",
140+
"source_e2e"
138141
]
139142
timeout-minutes: 120
140143
env:
@@ -149,6 +152,23 @@ jobs:
149152
steps:
150153
- run: echo "Running CI pipeline for JDK version ${{ matrix.jdk }}"
151154

155+
- name: Clean up disk space
156+
run: |
157+
set -euo pipefail
158+
159+
echo "Disk space before cleanup"
160+
df -h
161+
162+
echo "Cleaning up disk space"
163+
sudo rm -rf /usr/share/dotnet
164+
sudo rm -rf /usr/local/lib/android
165+
sudo rm -rf /opt/ghc
166+
sudo rm -rf /opt/hostedtoolcache/CodeQL
167+
sudo docker image prune --all --force
168+
169+
echo "Disk space after cleanup"
170+
df -h
171+
152172
- name: Check out repository code
153173
uses: actions/checkout@v4
154174
with:
@@ -205,13 +225,17 @@ jobs:
205225
("vitess")
206226
modules=${{ env.MODULES_VITESS }}
207227
;;
208-
("e2e")
209-
compile_modules="${{ env.MODULES_CORE }},${{ env.MODULES_PIPELINE_CONNECTORS }},${{ env.MODULES_MYSQL }},${{ env.MODULES_POSTGRES }},${{ env.MODULES_ORACLE }},${{ env.MODULES_MONGODB }},${{ env.MODULES_SQLSERVER }},${{ env.MODULES_TIDB }},${{ env.MODULES_OCEANBASE }},${{ env.MODULES_DB2 }},${{ env.MODULES_VITESS }},${{ env.MODULES_E2E }}"
210-
modules=${{ env.MODULES_E2E }}
228+
("pipeline_e2e")
229+
compile_modules="${{ env.MODULES_CORE }},${{ env.MODULES_PIPELINE_CONNECTORS }},${{ env.MODULES_MYSQL }},${{ env.MODULES_POSTGRES }},${{ env.MODULES_ORACLE }},${{ env.MODULES_MONGODB }},${{ env.MODULES_SQLSERVER }},${{ env.MODULES_TIDB }},${{ env.MODULES_OCEANBASE }},${{ env.MODULES_DB2 }},${{ env.MODULES_VITESS }},${{ env.MODULES_PIPELINE_E2E }}"
230+
modules=${{ env.MODULES_PIPELINE_E2E }}
231+
;;
232+
("source_e2e")
233+
compile_modules="${{ env.MODULES_CORE }},${{ env.MODULES_PIPELINE_CONNECTORS }},${{ env.MODULES_MYSQL }},${{ env.MODULES_POSTGRES }},${{ env.MODULES_ORACLE }},${{ env.MODULES_MONGODB }},${{ env.MODULES_SQLSERVER }},${{ env.MODULES_TIDB }},${{ env.MODULES_OCEANBASE }},${{ env.MODULES_DB2 }},${{ env.MODULES_VITESS }},${{ env.MODULES_SOURCE_E2E }}"
234+
modules=${{ env.MODULES_SOURCE_E2E }}
211235
;;
212236
esac
213237
214-
if [ ${{ matrix.module }} != "e2e" ]; then
238+
if [ ${{ matrix.module }} != "pipeline_e2e" ] && [ ${{ matrix.module }} != "source_e2e" ]; then
215239
compile_modules=$modules
216240
fi
217241

Dockerfile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#/*
2+
# * Licensed to the Apache Software Foundation (ASF) under one or more
3+
# * contributor license agreements. See the NOTICE file distributed with
4+
# * this work for additional information regarding copyright ownership.
5+
# * The ASF licenses this file to You under the Apache License, Version 2.0
6+
# * (the "License"); you may not use this file except in compliance with
7+
# * the License. You may obtain a copy of the License at
8+
# *
9+
# * http://www.apache.org/licenses/LICENSE-2.0
10+
# *
11+
# * Unless required by applicable law or agreed to in writing, software
12+
# * distributed under the License is distributed on an "AS IS" BASIS,
13+
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# * See the License for the specific language governing permissions and
15+
# * limitations under the License.
16+
# */
17+
18+
FROM flink
19+
20+
ARG FLINK_CDC_VERSION=3.3-SNAPSHOT
21+
ARG PIPELINE_DEFINITION_FILE
22+
23+
RUN mkdir -p /opt/flink-cdc
24+
RUN mkdir -p /opt/flink/usrlib
25+
ENV FLINK_CDC_HOME /opt/flink-cdc
26+
COPY flink-cdc-dist/target/flink-cdc-${FLINK_CDC_VERSION}-bin.tar.gz /tmp/
27+
RUN tar -xzvf /tmp/flink-cdc-${FLINK_CDC_VERSION}-bin.tar.gz -C /tmp/ && \
28+
mv /tmp/flink-cdc-${FLINK_CDC_VERSION}/* /opt/flink-cdc/ && \
29+
mv /opt/flink-cdc/lib/flink-cdc-dist-${FLINK_CDC_VERSION}.jar /opt/flink-cdc/lib/flink-cdc-dist.jar && \
30+
rm -rf /tmp/flink-cdc-${FLINK_CDC_VERSION} /tmp/flink-cdc-${FLINK_CDC_VERSION}-bin.tar.gz
31+
# copy jars to cdc libs
32+
COPY flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-values/target/flink-cdc-pipeline-connector-values-${FLINK_CDC_VERSION}.jar /opt/flink/usrlib/flink-cdc-pipeline-connector-values-${FLINK_CDC_VERSION}.jar
33+
COPY flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/target/flink-cdc-pipeline-connector-mysql-${FLINK_CDC_VERSION}.jar /opt/flink/usrlib/flink-cdc-pipeline-connector-mysql-${FLINK_CDC_VERSION}.jar
34+
# copy flink cdc pipeline conf file, Here is an example. Users can replace it according to their needs.
35+
COPY $PIPELINE_DEFINITION_FILE $FLINK_CDC_HOME/conf

README.md

Lines changed: 40 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,41 +35,46 @@ full database synchronization, sharding table synchronization, schema evolution
3535
2. [Download](https://github.com/apache/flink-cdc/releases) Flink CDC tar, unzip it and put jars of pipeline connector to Flink `lib` directory.
3636
3. Create a **YAML** file to describe the data source and data sink, the following example synchronizes all tables under MySQL app_db database to Doris :
3737
```yaml
38-
source:
39-
type: mysql
40-
name: MySQL Source
41-
hostname: 127.0.0.1
42-
port: 3306
43-
username: admin
44-
password: pass
45-
tables: adb.\.*
46-
server-id: 5401-5404
47-
48-
sink:
49-
type: doris
50-
name: Doris Sink
51-
fenodes: 127.0.0.1:8030
52-
username: root
53-
password: pass
54-
55-
transform:
56-
- source-table: adb.web_order01
57-
projection: \*, UPPER(product_name) as product_name
58-
filter: id > 10 AND order_id > 100
59-
description: project fields and filter
60-
- source-table: adb.web_order02
61-
projection: \*, UPPER(product_name) as product_name
62-
filter: id > 20 AND order_id > 200
63-
description: project fields and filter
64-
65-
route:
66-
- source-table: adb.web_order\.*
67-
sink-table: adb.ods_web_orders
68-
description: sync sharding tables to one destination table
69-
70-
pipeline:
71-
name: MySQL to Doris Pipeline
72-
parallelism: 4
38+
source:
39+
type: mysql
40+
hostname: localhost
41+
port: 3306
42+
username: root
43+
password: 123456
44+
tables: app_db.\.*
45+
46+
sink:
47+
type: doris
48+
fenodes: 127.0.0.1:8030
49+
username: root
50+
password: ""
51+
52+
transform:
53+
- source-table: adb.web_order01
54+
projection: \*, format('%S', product_name) as product_name
55+
filter: addone(id) > 10 AND order_id > 100
56+
description: project fields and filter
57+
- source-table: adb.web_order02
58+
projection: \*, format('%S', product_name) as product_name
59+
filter: addone(id) > 20 AND order_id > 200
60+
description: project fields and filter
61+
62+
route:
63+
- source-table: app_db.orders
64+
sink-table: ods_db.ods_orders
65+
- source-table: app_db.shipments
66+
sink-table: ods_db.ods_shipments
67+
- source-table: app_db.products
68+
sink-table: ods_db.ods_products
69+
70+
pipeline:
71+
name: Sync MySQL Database to Doris
72+
parallelism: 2
73+
user-defined-function:
74+
- name: addone
75+
classpath: com.example.functions.AddOneFunctionClass
76+
- name: format
77+
classpath: com.example.functions.FormatFunctionClass
7378
```
7479
4. Submit pipeline job using `flink-cdc.sh` script.
7580
```shell

docs/config.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ pygmentsUseClasses = true
3434
# we change the version for the complete docs when forking of a release branch
3535
# etc.
3636
# The full version string as referenced in Maven (e.g. 1.2.1)
37-
Version = "3.2-SNAPSHOT"
37+
Version = "3.3-SNAPSHOT"
3838

3939
# For stable releases, leave the bugfix version out (e.g. 1.2). For snapshot
4040
# release this should be the same as the regular version
41-
VersionTitle = "3.2-SNAPSHOT"
41+
VersionTitle = "3.3-SNAPSHOT"
4242

4343
# The branch for this version of Apache Flink CDC
4444
Branch = "master"
@@ -58,8 +58,8 @@ pygmentsUseClasses = true
5858
]
5959

6060
PreviousDocs = [
61+
["3.2", "https://nightlies.apache.org/flink/flink-cdc-docs-release-3.2"],
6162
["3.1", "https://nightlies.apache.org/flink/flink-cdc-docs-release-3.1"],
62-
["3.0", "https://nightlies.apache.org/flink/flink-cdc-docs-release-3.0"],
6363
]
6464

6565
[markup]

docs/content.zh/docs/connectors/flink-sources/mongodb-cdc.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ MongoDB CDC 连接器允许从 MongoDB 读取快照数据和增量数据。 本
3939
<groupId>org.apache.flink</groupId>
4040
<artifactId>flink-connector-mongodb-cdc</artifactId>
4141
<!-- 请使用已发布的版本依赖,snapshot 版本的依赖需要本地自行编译。 -->
42-
<version>3.2-SNAPSHOT</version>
42+
<version>3.3-SNAPSHOT</version>
4343
</dependency>
4444
```
4545

docs/content.zh/docs/connectors/flink-sources/mysql-cdc.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ MySQL CDC 连接器允许从 MySQL 数据库读取快照数据和增量数据。
4646
<groupId>org.apache.flink</groupId>
4747
<artifactId>flink-connector-mysql-cdc</artifactId>
4848
<!-- 请使用已发布的版本依赖,snapshot 版本的依赖需要本地自行编译。 -->
49-
<version>3.2-SNAPSHOT</version>
49+
<version>3.3-SNAPSHOT</version>
5050
</dependency>
5151
```
5252

docs/content.zh/docs/connectors/flink-sources/tutorials/oceanbase-tutorial.md

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,23 @@ under the License.
4242
version: '2.1'
4343
services:
4444
observer:
45-
image: oceanbase/oceanbase-ce:4.0.0.0
45+
image: 'oceanbase/oceanbase-ce:4.2.1.6-106000012024042515'
4646
container_name: observer
47-
network_mode: "host"
47+
environment:
48+
- 'MODE=mini'
49+
- 'OB_SYS_PASSWORD=123456'
50+
- 'OB_TENANT_PASSWORD=654321'
51+
ports:
52+
- '2881:2881'
53+
- '2882:2882'
4854
oblogproxy:
49-
image: whhe/oblogproxy:1.1.0_4x
55+
image: 'oceanbase/oblogproxy-ce:latest'
5056
container_name: oblogproxy
5157
environment:
5258
- 'OB_SYS_USERNAME=root'
53-
- 'OB_SYS_PASSWORD=pswd'
54-
network_mode: "host"
59+
- 'OB_SYS_PASSWORD=123456'
60+
ports:
61+
- '2983:2983'
5562
elasticsearch:
5663
image: 'elastic/elasticsearch:7.6.0'
5764
container_name: elasticsearch
@@ -85,42 +92,26 @@ services:
8592
docker-compose up -d
8693
```
8794

88-
### 设置密码
89-
90-
OceanBase 中 root 用户默认是没有密码的,但是 oblogproxy 需要配置一个使用非空密码的系统租户用户,因此这里我们需要先为 root@sys 用户设置一个密码。
95+
### 查询 Root Service List
9196

9297
登陆 sys 租户的 root 用户:
9398

9499
```shell
95-
docker-compose exec observer obclient -h127.0.0.1 -P2881 -uroot@sys
96-
```
97-
98-
设置密码,注意这里的密码需要与上一步中 oblogproxy 服务的环境变量 'OB_SYS_PASSWORD' 保持一样。
99-
100-
```mysql
101-
ALTER USER root IDENTIFIED BY 'pswd';
102-
```
103-
104-
OceanBase 从社区版 4.0.0.0 开始只支持对非 sys 租户的增量数据拉取,这里我们使用 test 租户的 root 用户作为示例。
105-
106-
登陆 test 租户的 root 用户:
107-
108-
```shell
109-
docker-compose exec observer obclient -h127.0.0.1 -P2881 -uroot@test
100+
docker-compose exec observer obclient -h127.0.0.1 -P2881 -uroot@sys -p123456
110101
```
111102

112-
设置密码:
103+
执行以下 sql 以查询 root service list,将 VALUE 列的值保存下来。
113104

114105
```mysql
115-
ALTER USER root IDENTIFIED BY 'test';
106+
SHOW PARAMETERS LIKE 'rootservice_list';
116107
```
117108

118109
### 准备数据
119110

120-
使用 'root@test' 用户登陆。
111+
使用测试用的 test 租户的 root 用户登陆。
121112

122113
```shell
123-
docker-compose exec observer obclient -h127.0.0.1 -P2881 -uroot@test -ptest
114+
docker-compose exec observer obclient -h127.0.0.1 -P2881 -uroot@test -p654321
124115
```
125116

126117
```sql
@@ -169,6 +160,8 @@ VALUES (default, '2020-07-30 10:08:22', 'Jark', 50.50, 102, false),
169160

170161
### 在 Flink SQL CLI 中使用 Flink DDL 创建表
171162

163+
注意在 OceanBase 源表的 SQL 中替换 root_service_list 为真实值。
164+
172165
```sql
173166
-- 设置间隔时间为3秒
174167
Flink SQL> SET execution.checkpointing.interval = 3s;
@@ -189,13 +182,13 @@ Flink SQL> CREATE TABLE orders (
189182
'connector' = 'oceanbase-cdc',
190183
'scan.startup.mode' = 'initial',
191184
'username' = 'root@test',
192-
'password' = 'test',
185+
'password' = '654321',
193186
'tenant-name' = 'test',
194187
'database-name' = '^ob$',
195188
'table-name' = '^orders$',
196189
'hostname' = 'localhost',
197190
'port' = '2881',
198-
'rootserver-list' = '127.0.0.1:2882:2881',
191+
'rootserver-list' = '${root_service_list}',
199192
'logproxy.host' = 'localhost',
200193
'logproxy.port' = '2983',
201194
'working-mode' = 'memory'
@@ -211,13 +204,13 @@ Flink SQL> CREATE TABLE products (
211204
'connector' = 'oceanbase-cdc',
212205
'scan.startup.mode' = 'initial',
213206
'username' = 'root@test',
214-
'password' = 'test',
207+
'password' = '654321',
215208
'tenant-name' = 'test',
216209
'database-name' = '^ob$',
217210
'table-name' = '^products$',
218211
'hostname' = 'localhost',
219212
'port' = '2881',
220-
'rootserver-list' = '127.0.0.1:2882:2881',
213+
'rootserver-list' = '${root_service_list}',
221214
'logproxy.host' = 'localhost',
222215
'logproxy.port' = '2983',
223216
'working-mode' = 'memory'

docs/content.zh/docs/connectors/pipeline-connectors/doris.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ pipeline:
162162
<td>String</td>
163163
<td>StreamLoad的参数。
164164
For example: <code> sink.properties.strict_mode: true</code>.
165-
查看更多关于 <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD/"> StreamLoad的Properties 属性</a></td>
165+
查看更多关于 <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD/"> StreamLoad 的属性</a></td>
166166
</td>
167167
</tr>
168168
<tr>
@@ -172,7 +172,7 @@ pipeline:
172172
<td>String</td>
173173
<td>创建表的Properties配置。
174174
For example: <code> table.create.properties.replication_num: 1</code>.
175-
查看更多关于 <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE/"> Doris Table 的 Properties 属性</a></td>
175+
查看更多关于 <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE/"> Doris Table 的属性</a></td>
176176
</td>
177177
</tr>
178178
</tbody>
@@ -186,7 +186,7 @@ pipeline:
186186
<thead>
187187
<tr>
188188
<th class="text-left" style="width:10%;">CDC type</th>
189-
<th class="text-left" style="width:30%;">Doris type<a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-types/Data-Types/BOOLEAN/"></a></th>
189+
<th class="text-left" style="width:30%;">Doris type</th>
190190
<th class="text-left" style="width:60%;">NOTE</th>
191191
</tr>
192192
</thead>

0 commit comments

Comments
 (0)