diff --git a/.github/workflows/flink_cdc_base.yml b/.github/workflows/flink_cdc_base.yml index c4410bf945d..c5157abc924 100644 --- a/.github/workflows/flink_cdc_base.yml +++ b/.github/workflows/flink_cdc_base.yml @@ -44,7 +44,7 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 120 strategy: max-parallel: 20 diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index b87a3b4c2a0..d53a389a664 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -16,26 +16,38 @@ # This workflow will triage pull requests and apply a label based on the # paths that are modified in the pull request. # -# To use this workflow, you will need to set up a .github/label.yml +# To use this workflow, you will need to set up a .github/labeler.yml # file with configuration. For more information, see: # https://github.com/actions/labeler name: Labeler + on: - pull_request: - branches: - - master - - release-* + workflow_run: + workflows: [Labeler-Trigger] + types: [requested] + +permissions: + checks: write + contents: read + pull-requests: write jobs: label: - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write - steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v6 + with: + persist-credentials: false + submodules: recursive + - name: "Get information about the original trigger of the run" + uses: ./.github/actions/get-workflow-origin + id: source-run-info + with: + token: ${{ secrets.GITHUB_TOKEN }} + sourceRunId: ${{ github.event.workflow_run.id }} - uses: actions/labeler@v6 with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" \ No newline at end of file + repo-token: "${{ secrets.GITHUB_TOKEN }}" + pr-number: ${{ steps.source-run-info.outputs.pullRequestNumber }} diff --git a/.github/workflows/label_trigger.yml b/.github/workflows/label_trigger.yml new file mode 100644 index 00000000000..fb6a8822e1f --- /dev/null +++ b/.github/workflows/label_trigger.yml @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Labeler-Trigger +on: + pull_request: + branches: + - master + - release-* + +jobs: + label: + runs-on: ubuntu-latest + steps: + - name: "Do nothing. Just triggers corresponding workflow." + run: echo \ No newline at end of file diff --git a/docs/content.zh/docs/connectors/flink-sources/mongodb-cdc.md b/docs/content.zh/docs/connectors/flink-sources/mongodb-cdc.md index f98c7b6db99..123288989e3 100644 --- a/docs/content.zh/docs/connectors/flink-sources/mongodb-cdc.md +++ b/docs/content.zh/docs/connectors/flink-sources/mongodb-cdc.md @@ -489,6 +489,63 @@ MongoDB 的`oplog.rs` 集合没有在状态之前保持更改记录的更新, 顺便说一句,[DBZ-435](https://issues.redhat.com/browse/DBZ-435)提到的Debezium的MongoDB变更流探索,正在制定路线图。
如果完成了,我们可以考虑集成两种源连接器供用户选择。 +### 动态加表 + +**注意:** 该功能从 Flink CDC 3.1.0 版本开始支持。 + +动态加表功能使你可以为正在运行的作业添加新集合进行监控。新添加的集合将首先读取其快照数据,然后自动读取其变更流。 + +想象一下这个场景:一开始,Flink 作业监控集合 `[product, user, address]`,但几天后,我们希望这个作业还可以监控集合 `[order, custom]`,这些集合包含历史数据,我们需要作业仍然可以复用作业的已有状态。动态加表功能可以优雅地解决此问题。 + +以下操作显示了如何启用此功能来解决上述场景。使用现有的 MongoDB CDC Source 作业,如下: + +```java + MongoDBSource mongoSource = MongoDBSource.builder() + .hosts("yourHostname:27017") + .databaseList("db") // 设置捕获的数据库 + .collectionList("db.product", "db.user", "db.address") // 设置捕获的集合 + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) // 启用扫描新添加的表功能 + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` + +如果我们想添加新集合 `[order, custom]` 到现有的 Flink 作业,只需更新作业的 `collectionList()` 将新增集合 `[order, custom]` 加入并从已有的 savepoint 恢复作业。 + +_Step 1_: 使用 savepoint 停止现有的 Flink 作业。 +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: 更新现有 Flink 作业的集合列表选项。 +1. 更新 `collectionList()` 参数。 +2. 编译更新后的作业,示例如下: +```java + MongoDBSource mongoSource = MongoDBSource.builder() + .hosts("yourHostname:27017") + .databaseList("db") + .collectionList("db.product", "db.user", "db.address", "db.order", "db.custom") // 设置捕获的集合 [product, user, address, order, custom] + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` +_Step 3_: 从 savepoint 还原更新后的 Flink 作业。 +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**注意:** 请参考文档 [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) 了解更多详细信息。 + ### DataStream Source MongoDB CDC 连接器也可以是一个数据流源。 你可以创建 SourceFunction,如下所示: diff --git a/docs/content.zh/docs/connectors/flink-sources/oracle-cdc.md b/docs/content.zh/docs/connectors/flink-sources/oracle-cdc.md index 2e5c1dd6058..5258e6b411b 100644 --- a/docs/content.zh/docs/connectors/flink-sources/oracle-cdc.md +++ b/docs/content.zh/docs/connectors/flink-sources/oracle-cdc.md @@ -558,6 +558,67 @@ _Note: the mechanism of `scan.startup.mode` option relying on Debezium's `snapsh The Oracle CDC source can't work in parallel reading, because there is only one task can receive change events. +### 动态加表 + +**注意:** 该功能从 Flink CDC 3.1.0 版本开始支持。 + +动态加表功能使你可以为正在运行的作业添加新表进行监控。新添加的表将首先读取其快照数据,然后自动读取其 redo log。 + +想象一下这个场景:一开始,Flink 作业监控表 `[product, user, address]`,但几天后,我们希望这个作业还可以监控表 `[order, custom]`,这些表包含历史数据,我们需要作业仍然可以复用作业的已有状态。动态加表功能可以优雅地解决此问题。 + +以下操作显示了如何启用此功能来解决上述场景。使用现有的 Oracle CDC Source 作业,如下: + +```java + JdbcIncrementalSource oracleSource = new OracleSourceBuilder() + .hostname("yourHostname") + .port(1521) + .databaseList("ORCLCDB") // 设置捕获的数据库 + .schemaList("INVENTORY") // 设置捕获的 schema + .tableList("INVENTORY.PRODUCT", "INVENTORY.USER", "INVENTORY.ADDRESS") // 设置捕获的表 + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) // 启用扫描新添加的表功能 + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` + +如果我们想添加新表 `[INVENTORY.ORDER, INVENTORY.CUSTOM]` 到现有的 Flink 作业,只需更新作业的 `tableList()` 将新增表 `[INVENTORY.ORDER, INVENTORY.CUSTOM]` 加入并从已有的 savepoint 恢复作业。 + +_Step 1_: 使用 savepoint 停止现有的 Flink 作业。 +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: 更新现有 Flink 作业的表列表选项。 +1. 更新 `tableList()` 参数。 +2. 编译更新后的作业,示例如下: +```java + JdbcIncrementalSource oracleSource = new OracleSourceBuilder() + .hostname("yourHostname") + .port(1521) + .databaseList("ORCLCDB") + .schemaList("INVENTORY") + .tableList("INVENTORY.PRODUCT", "INVENTORY.USER", "INVENTORY.ADDRESS", "INVENTORY.ORDER", "INVENTORY.CUSTOM") // 设置捕获的表 [PRODUCT, USER, ADDRESS, ORDER, CUSTOM] + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` +_Step 3_: 从 savepoint 还原更新后的 Flink 作业。 +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**注意:** 请参考文档 [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) 了解更多详细信息。 + ### DataStream Source The Oracle CDC connector can also be a DataStream source. There are two modes for the DataStream source: diff --git a/docs/content.zh/docs/connectors/flink-sources/postgres-cdc.md b/docs/content.zh/docs/connectors/flink-sources/postgres-cdc.md index a9a5b560b3d..44f5fca414f 100644 --- a/docs/content.zh/docs/connectors/flink-sources/postgres-cdc.md +++ b/docs/content.zh/docs/connectors/flink-sources/postgres-cdc.md @@ -510,6 +510,71 @@ The config option `scan.startup.mode` specifies the startup mode for PostgreSQL - `committed-offset`: Skip snapshot phase and start reading events from a `confirmed_flush_lsn` offset of replication slot. - `snapshot`: Only the snapshot phase is performed and exits after the snapshot phase reading is completed. +### 动态加表 + +**注意:** 该功能从 Flink CDC 3.1.0 版本开始支持。 + +动态加表功能使你可以为正在运行的作业添加新表进行监控。新添加的表将首先读取其快照数据,然后自动读取其 WAL (Write-Ahead Log) 日志 或者 replication slot changes 复制槽。 + +想象一下这个场景:一开始,Flink 作业监控表 `[product, user, address]`,但几天后,我们希望这个作业还可以监控表 `[order, custom]`,这些表包含历史数据,我们需要作业仍然可以复用作业的已有状态。动态加表功能可以优雅地解决此问题。 + +以下操作显示了如何启用此功能来解决上述场景。使用现有的 PostgreSQL CDC Source 作业,如下: + +```java + JdbcIncrementalSource postgresSource = + PostgresSourceBuilder.PostgresIncrementalSource.builder() + .hostname("yourHostname") + .port(5432) + .database("postgres") // 设置捕获的数据库 + .schemaList("inventory") // 设置捕获的 schema + .tableList("inventory.product", "inventory.user", "inventory.address") // 设置捕获的表 + .username("yourUsername") + .password("yourPassword") + .slotName("flink") + .scanNewlyAddedTableEnabled(true) // 启用扫描新添加的表功能 + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` + +如果我们想添加新表 `[inventory.order, inventory.custom]` 到现有的 Flink 作业,只需更新作业的 `tableList()` 将新增表 `[inventory.order, inventory.custom]` 加入并从已有的 savepoint 恢复作业。 + +_Step 1_: 使用 savepoint 停止现有的 Flink 作业。 +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: 更新现有 Flink 作业的表列表选项。 +1. 更新 `tableList()` 参数。 +2. 编译更新后的作业,示例如下: +```java + JdbcIncrementalSource postgresSource = + PostgresSourceBuilder.PostgresIncrementalSource.builder() + .hostname("yourHostname") + .port(5432) + .database("postgres") + .schemaList("inventory") + .tableList("inventory.product", "inventory.user", "inventory.address", "inventory.order", "inventory.custom") // 设置捕获的表 [product, user, address, order, custom] + .username("yourUsername") + .password("yourPassword") + .slotName("flink") + .scanNewlyAddedTableEnabled(true) + .deserializer(new JsonDebeziumDeserializationSchema()) // 将 SourceRecord 转换为 JSON 字符串 + .build(); + // 你的业务代码 +``` +_Step 3_: 从 savepoint 还原更新后的 Flink 作业。 +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**注意:** 请参考文档 [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) 了解更多详细信息。 + ### DataStream Source The Postgres CDC connector can also be a DataStream source. There are two modes for the DataStream source: diff --git a/docs/content.zh/docs/connectors/pipeline-connectors/starrocks.md b/docs/content.zh/docs/connectors/pipeline-connectors/starrocks.md index 33b7d96ae6f..98f79f86ac9 100644 --- a/docs/content.zh/docs/connectors/pipeline-connectors/starrocks.md +++ b/docs/content.zh/docs/connectors/pipeline-connectors/starrocks.md @@ -128,15 +128,15 @@ pipeline: sink.connect.timeout-ms optional 30000 - String + Integer 与 FE 建立 HTTP 连接的超时时间。取值范围:[100, 60000]。 sink.wait-for-continue.timeout-ms optional 30000 - String - 等待 FE HTTP 100-continue 应答的超时时间。取值范围:[3000, 60000]。 + Integer + 等待 FE HTTP 100-continue 应答的超时时间。取值范围:[3000, 600000]。 sink.buffer-flush.max-bytes @@ -174,6 +174,13 @@ pipeline: Boolean at-least-once 下是否使用 transaction stream load。 + + sink.metric.histogram-window-size + optional + 100 + Integer + 直方图指标的窗口大小。 + sink.properties.* optional @@ -297,6 +304,11 @@ pipeline: DATE + + TIME + VARCHAR + StarRocks 不支持 TIME 类型,因此映射为 VARCHAR。TIME(p) 值以字符串形式存储:当 p = 0 时格式为 "HH:mm:ss",当 p > 0 时格式为 "HH:mm:ss.<p 位小数>"(例如 p = 3 时为 "HH:mm:ss.SSS")。 + TIMESTAMP DATETIME diff --git a/docs/content.zh/docs/core-concept/transform.md b/docs/content.zh/docs/core-concept/transform.md index 5e1a00c9f46..a9bae80cd9c 100644 --- a/docs/content.zh/docs/core-concept/transform.md +++ b/docs/content.zh/docs/core-concept/transform.md @@ -412,6 +412,51 @@ pipeline: 注意这里的 `classpath` 必须是全限定名,并且对应的 `jar` 文件必须包含在 Flink `/lib` 文件夹中,或者通过 `flink-cdc.sh --jar` 选项传递。 +### UDF 配置选项 + +你可以通过添加 `options` 块来向 UDF 传递额外的配置选项。这些选项可以在 `open` 方法中通过 `UserDefinedFunctionContext.configuration()` 获取: + +```yaml +pipeline: + user-defined-function: + - name: query_redis + classpath: com.example.flink.cdc.udf.RedisQueryFunction + options: + hostname: localhost + port: "6379" + cache.enabled: "true" +``` + +在你的 UDF 实现中,可以通过定义 `ConfigOption` 实例来访问这些配置选项: + +```java +import org.apache.flink.cdc.common.configuration.ConfigOption; +import org.apache.flink.cdc.common.configuration.ConfigOptions; + +public class RedisQueryFunction implements UserDefinedFunction { + private static final ConfigOption HOSTNAME = + ConfigOptions.key("hostname").stringType().noDefaultValue(); + private static final ConfigOption PORT = + ConfigOptions.key("port").intType().defaultValue(6379); + + private String hostname; + private int port; + + @Override + public void open(UserDefinedFunctionContext context) throws Exception { + hostname = context.configuration().get(HOSTNAME); + port = context.configuration().get(PORT); + // 在这里初始化你的连接... + } + + public Object eval(String key) { + // 使用 hostname 和 port 查询 Redis... + } +} +``` + +`options` 字段是可选的。如果未指定,将会传递一个空的配置给 UDF。 + 在正确注册后,UDF 可以在 `projection` 和 `filter` 表达式中使用,就像内置函数一样: ```yaml diff --git a/docs/content/docs/connectors/flink-sources/mongodb-cdc.md b/docs/content/docs/connectors/flink-sources/mongodb-cdc.md index 98353a3b8c9..80ecad42f08 100644 --- a/docs/content/docs/connectors/flink-sources/mongodb-cdc.md +++ b/docs/content/docs/connectors/flink-sources/mongodb-cdc.md @@ -512,6 +512,63 @@ Applications can use change streams to subscribe to all data changes on a single By the way, Debezium's MongoDB change streams exploration mentioned by [DBZ-435](https://issues.redhat.com/browse/DBZ-435) is on roadmap.
If it's done, we can consider integrating two kinds of source connector for users to choose. +### Scan Newly Added Collections + +**Note:** This feature is available since Flink CDC 3.1.0. + +The Scan Newly Added Collections feature enables you to add new collections to monitor for existing running pipeline. The newly added collections will read their snapshot data firstly and then read their change stream automatically. + +Imagine this scenario: At the beginning, a Flink job monitors collections `[product, user, address]`, but after some days we would like the job can also monitor collections `[order, custom]` which contain history data, and we need the job can still reuse existing state of the job. This feature can resolve this case gracefully. + +The following operations show how to enable this feature to resolve above scenario. An existing Flink job which uses MongoDB CDC Source like: + +```java + MongoDBSource mongoSource = MongoDBSource.builder() + .hosts("yourHostname:27017") + .databaseList("db") // set captured database + .collectionList("db.product", "db.user", "db.address") // set captured collections + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) // enable scan the newly added collections feature + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` + +If we would like to add new collections `[order, custom]` to an existing Flink job, we just need to update the `collectionList()` value of the job to include `[order, custom]` and restore the job from previous savepoint. + +_Step 1_: Stop the existing Flink job with savepoint. +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: Update the collection list option for the existing Flink job. +1. update `collectionList()` value. +2. build the jar of updated job. +```java + MongoDBSource mongoSource = MongoDBSource.builder() + .hosts("yourHostname:27017") + .databaseList("db") + .collectionList("db.product", "db.user", "db.address", "db.order", "db.custom") // set captured collections [product, user, address, order, custom] + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) // enable scan newly added tables feature + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` +_Step 3_: Restore the updated Flink job from savepoint. +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**Note:** Please refer the doc [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) for more details. + ### DataStream Source The MongoDB CDC connector can also be a DataStream source. You can create a SourceFunction as the following shows: diff --git a/docs/content/docs/connectors/flink-sources/oracle-cdc.md b/docs/content/docs/connectors/flink-sources/oracle-cdc.md index 260e9635af0..f61eb0f9aa9 100644 --- a/docs/content/docs/connectors/flink-sources/oracle-cdc.md +++ b/docs/content/docs/connectors/flink-sources/oracle-cdc.md @@ -559,6 +559,67 @@ _Note: the mechanism of `scan.startup.mode` option relying on Debezium's `snapsh The Oracle CDC source can't work in parallel reading, because there is only one task can receive change events. +### Scan Newly Added Tables + +**Note:** This feature is available since Flink CDC 3.1.0. + +Scan Newly Added Tables feature enables you to add new tables to monitor for an existing running pipeline. The newly added tables will read their snapshot data first and then read their redo log automatically. + +Imagine this scenario: At the beginning, a Flink job monitors tables `[product, user, address]`, but after some days we would like the job to also monitor tables `[order, custom]` which contain historical data, and we need the job to still reuse existing state of the job. This feature can resolve this case gracefully. + +The following operations show how to enable this feature to resolve above scenario. An existing Flink job which uses Oracle CDC Source like: + +```java + JdbcIncrementalSource oracleSource = new OracleSourceBuilder() + .hostname("yourHostname") + .port(1521) + .databaseList("ORCLCDB") // set captured database + .schemaList("INVENTORY") // set captured schema + .tableList("INVENTORY.PRODUCT", "INVENTORY.USER", "INVENTORY.ADDRESS") // set captured tables + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) // enable scan newly added tables feature + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` + +If we would like to add new tables `[INVENTORY.ORDER, INVENTORY.CUSTOM]` to an existing Flink job, we just need to update the `tableList()` value of the job to include `[INVENTORY.ORDER, INVENTORY.CUSTOM]` and restore the job from previous savepoint. + +_Step 1_: Stop the existing Flink job with savepoint. +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: Update the table list option for the existing Flink job. +1. update `tableList()` value. +2. build the jar of updated job. +```java + JdbcIncrementalSource oracleSource = new OracleSourceBuilder() + .hostname("yourHostname") + .port(1521) + .databaseList("ORCLCDB") + .schemaList("INVENTORY") + .tableList("INVENTORY.PRODUCT", "INVENTORY.USER", "INVENTORY.ADDRESS", "INVENTORY.ORDER", "INVENTORY.CUSTOM") // set captured tables [PRODUCT, USER, ADDRESS, ORDER, CUSTOM] + .username("yourUsername") + .password("yourPassword") + .scanNewlyAddedTableEnabled(true) + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` +_Step 3_: Restore the updated Flink job from savepoint. +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**Note:** Please refer the doc [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) for more details. + ### DataStream Source The Oracle CDC connector can also be a DataStream source. There are two modes for the DataStream source: diff --git a/docs/content/docs/connectors/flink-sources/postgres-cdc.md b/docs/content/docs/connectors/flink-sources/postgres-cdc.md index a0477e54c99..d14549b271e 100644 --- a/docs/content/docs/connectors/flink-sources/postgres-cdc.md +++ b/docs/content/docs/connectors/flink-sources/postgres-cdc.md @@ -511,6 +511,71 @@ The config option `scan.startup.mode` specifies the startup mode for PostgreSQL - `committed-offset`: Skip snapshot phase and start reading events from a `confirmed_flush_lsn` offset of replication slot. - `snapshot`: Only the snapshot phase is performed and exits after the snapshot phase reading is completed. +### Scan Newly Added Tables + +**Note:** This feature is available since Flink CDC 3.1.0. + +Scan Newly Added Tables feature enables you to add new tables to monitor for existing running pipeline. The newly added tables will read their snapshot data firstly and then read their WAL (Write-Ahead Log) or replication slot changes automatically. + +Imagine this scenario: At the beginning, a Flink job monitors tables `[product, user, address]`, but after some days we would like the job to also monitor tables `[order, custom]` which contain historical data, and we need the job to still reuse existing state of the job. This feature can resolve this case gracefully. + +The following operations show how to enable this feature to resolve above scenario. An existing Flink job which uses PostgreSQL CDC Source like: + +```java + JdbcIncrementalSource postgresSource = + PostgresSourceBuilder.PostgresIncrementalSource.builder() + .hostname("yourHostname") + .port(5432) + .database("postgres") // set captured database + .schemaList("inventory") // set captured schema + .tableList("inventory.product", "inventory.user", "inventory.address") // set captured tables + .username("yourUsername") + .password("yourPassword") + .slotName("flink") + .scanNewlyAddedTableEnabled(true) // enable scan newly added tables feature + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` + +If we would like to add new tables `[inventory.order, inventory.custom]` to an existing Flink job, we just need to update the `tableList()` value of the job to include `[inventory.order, inventory.custom]` and restore the job from previous savepoint. + +_Step 1_: Stop the existing Flink job with savepoint. +```shell +$ ./bin/flink stop $Existing_Flink_JOB_ID +``` +```shell +Suspending job "cca7bc1061d61cf15238e92312c2fc20" with a savepoint. +Savepoint completed. Path: file:/tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab +``` +_Step 2_: Update the table list option for the existing Flink job. +1. update `tableList()` value. +2. build the jar of updated job. +```java + JdbcIncrementalSource postgresSource = + PostgresSourceBuilder.PostgresIncrementalSource.builder() + .hostname("yourHostname") + .port(5432) + .database("postgres") + .schemaList("inventory") + .tableList("inventory.product", "inventory.user", "inventory.address", "inventory.order", "inventory.custom") // set captured tables [product, user, address, order, custom] + .username("yourUsername") + .password("yourPassword") + .slotName("flink") + .scanNewlyAddedTableEnabled(true) + .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String + .build(); + // your business code +``` +_Step 3_: Restore the updated Flink job from savepoint. +```shell +$ ./bin/flink run \ + --detached \ + --from-savepoint /tmp/flink-savepoints/savepoint-cca7bc-bb1e257f0dab \ + ./FlinkCDCExample.jar +``` +**Note:** Please refer to the doc [Restore the job from previous savepoint](https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/cli/#command-line-interface) for more details. + ### DataStream Source The Postgres CDC connector can also be a DataStream source. There are two modes for the DataStream source: diff --git a/docs/content/docs/connectors/pipeline-connectors/starrocks.md b/docs/content/docs/connectors/pipeline-connectors/starrocks.md index 7a979d01977..67fbbf0466a 100644 --- a/docs/content/docs/connectors/pipeline-connectors/starrocks.md +++ b/docs/content/docs/connectors/pipeline-connectors/starrocks.md @@ -128,14 +128,14 @@ pipeline: sink.connect.timeout-ms optional 30000 - String + Integer The timeout for establishing HTTP connection. Valid values: 100 to 60000. sink.wait-for-continue.timeout-ms optional 30000 - String + Integer Timeout in millisecond to wait for 100-continue response from FE http server. Valid values: 3000 to 600000. @@ -177,6 +177,13 @@ pipeline: Boolean Whether to use transaction stream load for at-least-once when it's available. + + sink.metric.histogram-window-size + optional + 100 + Integer + Window size of histogram metrics. + sink.properties.* optional @@ -306,6 +313,11 @@ pipeline: DATE + + TIME + VARCHAR + StarRocks does not support TIME type, so it is mapped to VARCHAR. TIME values are stored as strings in format "HH:mm:ss" when the precision p = 0, or "HH:mm:ss.<p digits>" when p > 0 (for example, p = 3 uses "HH:mm:ss.SSS"). + TIMESTAMP DATETIME diff --git a/docs/content/docs/core-concept/transform.md b/docs/content/docs/core-concept/transform.md index 22fe8aff143..aed16256762 100644 --- a/docs/content/docs/core-concept/transform.md +++ b/docs/content/docs/core-concept/transform.md @@ -417,6 +417,51 @@ pipeline: Notice that given classpath must be fully-qualified, and corresponding `jar` files must be included in Flink `/lib` folder, or be passed with `flink-cdc.sh --jar` option. +### UDF Options + +You can pass extra options to UDFs by adding an `options` block. These options will be available in the `open` method through `UserDefinedFunctionContext.configuration()`: + +```yaml +pipeline: + user-defined-function: + - name: query_redis + classpath: com.example.flink.cdc.udf.RedisQueryFunction + options: + hostname: localhost + port: "6379" + cache.enabled: "true" +``` + +And in your UDF implementation, you can access these options by defining `ConfigOption` instances: + +```java +import org.apache.flink.cdc.common.configuration.ConfigOption; +import org.apache.flink.cdc.common.configuration.ConfigOptions; + +public class RedisQueryFunction implements UserDefinedFunction { + private static final ConfigOption HOSTNAME = + ConfigOptions.key("hostname").stringType().noDefaultValue(); + private static final ConfigOption PORT = + ConfigOptions.key("port").intType().defaultValue(6379); + + private String hostname; + private int port; + + @Override + public void open(UserDefinedFunctionContext context) throws Exception { + hostname = context.configuration().get(HOSTNAME); + port = context.configuration().get(PORT); + // Initialize your connection here... + } + + public Object eval(String key) { + // Query Redis using hostname and port... + } +} +``` + +The `options` field is optional. If not specified, an empty configuration will be passed to the UDF. + After being correctly registered, UDFs could be used in both `projection` and `filter` expressions, just like built-in functions: ```yaml diff --git a/flink-cdc-cli/src/main/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParser.java b/flink-cdc-cli/src/main/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParser.java index efb4e79d26b..79886ea08f4 100644 --- a/flink-cdc-cli/src/main/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParser.java +++ b/flink-cdc-cli/src/main/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParser.java @@ -92,6 +92,7 @@ public class YamlPipelineDefinitionParser implements PipelineDefinitionParser { private static final String UDF_KEY = "user-defined-function"; private static final String UDF_FUNCTION_NAME_KEY = "name"; private static final String UDF_CLASSPATH_KEY = "classpath"; + private static final String UDF_OPTIONS_KEY = "options"; // Model related keys private static final String MODEL_NAME_KEY = "model-name"; @@ -295,7 +296,7 @@ private UdfDef toUdfDef(JsonNode udfNode) { "UDF", udfNode, Arrays.asList(UDF_FUNCTION_NAME_KEY, UDF_CLASSPATH_KEY), - Collections.emptyList()); + Collections.singletonList(UDF_OPTIONS_KEY)); String functionName = checkNotNull( @@ -310,7 +311,15 @@ private UdfDef toUdfDef(JsonNode udfNode) { UDF_CLASSPATH_KEY) .asText(); - return new UdfDef(functionName, classpath); + Map options = + Optional.ofNullable(udfNode.get(UDF_OPTIONS_KEY)) + .map( + node -> + mapper.convertValue( + node, new TypeReference>() {})) + .orElse(null); + + return new UdfDef(functionName, classpath, options); } private TransformDef toTransformDef(JsonNode transformNode) { diff --git a/flink-cdc-cli/src/test/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParserTest.java b/flink-cdc-cli/src/test/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParserTest.java index bf4c377ee2c..bbfafcf8e69 100644 --- a/flink-cdc-cli/src/test/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParserTest.java +++ b/flink-cdc-cli/src/test/java/org/apache/flink/cdc/cli/parser/YamlPipelineDefinitionParserTest.java @@ -197,6 +197,15 @@ void testUdfDefinition() throws Exception { assertThat(pipelineDef).isEqualTo(pipelineDefWithUdf); } + @Test + void testUdfDefinitionWithOptions() throws Exception { + URL resource = + Resources.getResource("definitions/pipeline-definition-with-udf-options.yaml"); + YamlPipelineDefinitionParser parser = new YamlPipelineDefinitionParser(); + PipelineDef pipelineDef = parser.parse(new Path(resource.toURI()), new Configuration()); + assertThat(pipelineDef).isEqualTo(pipelineDefWithUdfOptions); + } + @Test void testSchemaEvolutionTypesConfiguration() throws Exception { testSchemaEvolutionTypesParsing( @@ -669,4 +678,42 @@ void testParsingFullDefinitionFromString() throws Exception { ImmutableMap.builder() .put("parallelism", "1") .build())); + + private final PipelineDef pipelineDefWithUdfOptions = + new PipelineDef( + new SourceDef("values", null, new Configuration()), + new SinkDef( + "values", + null, + new Configuration(), + ImmutableSet.of( + DROP_COLUMN, + ALTER_COLUMN_TYPE, + ADD_COLUMN, + CREATE_TABLE, + RENAME_COLUMN)), + Collections.emptyList(), + Collections.singletonList( + new TransformDef( + "mydb.web_order", + "*, query_redis(id) as redis_value", + "id > 0", + null, + null, + null, + null, + null)), + Collections.singletonList( + new UdfDef( + "query_redis", + "org.apache.flink.cdc.udf.examples.java.RedisQueryFunction", + ImmutableMap.builder() + .put("hostname", "localhost") + .put("port", "6379") + .put("cache.enabled", "true") + .build())), + Configuration.fromMap( + ImmutableMap.builder() + .put("parallelism", "1") + .build())); } diff --git a/flink-cdc-cli/src/test/resources/definitions/pipeline-definition-with-udf-options.yaml b/flink-cdc-cli/src/test/resources/definitions/pipeline-definition-with-udf-options.yaml new file mode 100644 index 00000000000..6a985cbf41c --- /dev/null +++ b/flink-cdc-cli/src/test/resources/definitions/pipeline-definition-with-udf-options.yaml @@ -0,0 +1,36 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +source: + type: values + +sink: + type: values + +transform: + - source-table: mydb.web_order + projection: "*, query_redis(id) as redis_value" + filter: id > 0 + +pipeline: + parallelism: 1 + user-defined-function: + - name: query_redis + classpath: org.apache.flink.cdc.udf.examples.java.RedisQueryFunction + options: + hostname: localhost + port: "6379" + cache.enabled: "true" diff --git a/flink-cdc-composer/pom.xml b/flink-cdc-composer/pom.xml index 0924951d0ab..b5f1d8a187a 100644 --- a/flink-cdc-composer/pom.xml +++ b/flink-cdc-composer/pom.xml @@ -55,6 +55,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/definition/UdfDef.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/definition/UdfDef.java index 6dbc580fb38..0486619ec85 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/definition/UdfDef.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/definition/UdfDef.java @@ -17,6 +17,8 @@ package org.apache.flink.cdc.composer.definition; +import java.util.Collections; +import java.util.Map; import java.util.Objects; /** @@ -27,15 +29,22 @@ *
    *
  • name: Static method name of user-defined functions. *
  • classpath: Fully-qualified class path of package containing given function. + *
  • options: Configuration options for the user-defined function. *
*/ public class UdfDef { private final String name; private final String classpath; + private final Map options; public UdfDef(String name, String classpath) { + this(name, classpath, Collections.emptyMap()); + } + + public UdfDef(String name, String classpath, Map options) { this.name = name; this.classpath = classpath; + this.options = options != null ? options : Collections.emptyMap(); } public String getName() { @@ -46,6 +55,10 @@ public String getClasspath() { return classpath; } + public Map getOptions() { + return options; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -56,16 +69,27 @@ public boolean equals(Object o) { } UdfDef udfDef = (UdfDef) o; - return Objects.equals(name, udfDef.name) && Objects.equals(classpath, udfDef.classpath); + return Objects.equals(name, udfDef.name) + && Objects.equals(classpath, udfDef.classpath) + && Objects.equals(options, udfDef.options); } @Override public int hashCode() { - return Objects.hash(name, classpath); + return Objects.hash(name, classpath, options); } @Override public String toString() { - return "UdfDef{" + "name='" + name + '\'' + ", classpath='" + classpath + '\'' + '}'; + return "UdfDef{" + + "name='" + + name + + '\'' + + ", classpath='" + + classpath + + '\'' + + ", options=" + + options + + '}'; } } diff --git a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/TransformTranslator.java b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/TransformTranslator.java index 4cc7a0b2423..a938d7ab05e 100644 --- a/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/TransformTranslator.java +++ b/flink-cdc-composer/src/main/java/org/apache/flink/cdc/composer/flink/translator/TransformTranslator.java @@ -30,7 +30,6 @@ import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -133,6 +132,6 @@ private Tuple3> modelToUDFTuple(ModelDef mod } private Tuple3> udfDefToUDFTuple(UdfDef udf) { - return Tuple3.of(udf.getName(), udf.getClasspath(), new HashMap<>()); + return Tuple3.of(udf.getName(), udf.getClasspath(), udf.getOptions()); } } diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java index 13cf54e7f13..fd2368b6ce0 100644 --- a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerITCase.java @@ -52,12 +52,13 @@ import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; import org.apache.flink.test.junit5.MiniClusterExtension; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import java.io.ByteArrayOutputStream; import java.io.PrintStream; diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java index f6bb4f4295e..ede3c4b1721 100644 --- a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineComposerLenientITCase.java @@ -46,13 +46,14 @@ import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; import org.apache.flink.test.junit5.MiniClusterExtension; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import java.io.ByteArrayOutputStream; import java.io.PrintStream; diff --git a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineUdfITCase.java b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineUdfITCase.java index 5daf370af62..9d5a8b22e23 100644 --- a/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineUdfITCase.java +++ b/flink-cdc-composer/src/test/java/org/apache/flink/cdc/composer/flink/FlinkPipelineUdfITCase.java @@ -547,6 +547,79 @@ void testComplicatedUdf(ValuesDataSink.SinkApi sinkApi, String language) throws .contains("[ LifecycleFunction ] closed. Called 6 times."); } + @ParameterizedTest + @MethodSource("testParams") + void testConfigurableUdf(ValuesDataSink.SinkApi sinkApi, String language) throws Exception { + FlinkPipelineComposer composer = FlinkPipelineComposer.ofMiniCluster(); + + // Setup value source + Configuration sourceConfig = new Configuration(); + sourceConfig.set( + ValuesDataSourceOptions.EVENT_SET_ID, + ValuesDataSourceHelper.EventSetId.TRANSFORM_TABLE); + SourceDef sourceDef = + new SourceDef(ValuesDataFactory.IDENTIFIER, "Value Source", sourceConfig); + + // Setup value sink + Configuration sinkConfig = new Configuration(); + sinkConfig.set(ValuesDataSinkOptions.MATERIALIZED_IN_MEMORY, true); + sinkConfig.set(ValuesDataSinkOptions.SINK_API, sinkApi); + SinkDef sinkDef = new SinkDef(ValuesDataFactory.IDENTIFIER, "Value Sink", sinkConfig); + + // Setup transform + TransformDef transformDef = + new TransformDef( + "default_namespace.default_schema.table1", + "*, greet(col1) as greeting", + null, + "col1", + null, + "key1=value1", + "", + null); + + // Setup UDF with options + UdfDef udfDef = + new UdfDef( + "greet", + String.format( + "org.apache.flink.cdc.udf.examples.%s.ConfigurableFunctionClass", + language), + ImmutableMap.of("greeting", "Hi", "suffix", "~")); + + // Setup pipeline + Configuration pipelineConfig = new Configuration(); + pipelineConfig.set(PipelineOptions.PIPELINE_PARALLELISM, 1); + pipelineConfig.set( + PipelineOptions.PIPELINE_SCHEMA_CHANGE_BEHAVIOR, SchemaChangeBehavior.EVOLVE); + PipelineDef pipelineDef = + new PipelineDef( + sourceDef, + sinkDef, + Collections.emptyList(), + Collections.singletonList(transformDef), + Collections.singletonList(udfDef), + pipelineConfig); + + // Execute the pipeline + PipelineExecution execution = composer.compose(pipelineDef); + execution.execute(); + + // Check the order and content of all received events + String[] outputEvents = outCaptor.toString().trim().split("\n"); + assertThat(outputEvents) + .containsExactly( + "CreateTableEvent{tableId=default_namespace.default_schema.table1, schema=columns={`col1` STRING NOT NULL,`col2` STRING,`greeting` STRING}, primaryKeys=col1, options=({key1=value1})}", + "DataChangeEvent{tableId=default_namespace.default_schema.table1, before=[], after=[1, 1, Hi 1~], op=INSERT, meta=({op_ts=1})}", + "DataChangeEvent{tableId=default_namespace.default_schema.table1, before=[], after=[2, 2, Hi 2~], op=INSERT, meta=({op_ts=2})}", + "DataChangeEvent{tableId=default_namespace.default_schema.table1, before=[], after=[3, 3, Hi 3~], op=INSERT, meta=({op_ts=3})}", + "AddColumnEvent{tableId=default_namespace.default_schema.table1, addedColumns=[ColumnWithPosition{column=`col3` STRING, position=AFTER, existedColumnName=col2}]}", + "RenameColumnEvent{tableId=default_namespace.default_schema.table1, nameMapping={col2=newCol2, col3=newCol3}}", + "DropColumnEvent{tableId=default_namespace.default_schema.table1, droppedColumnNames=[newCol2]}", + "DataChangeEvent{tableId=default_namespace.default_schema.table1, before=[1, 1, Hi 1~], after=[], op=DELETE, meta=({op_ts=4})}", + "DataChangeEvent{tableId=default_namespace.default_schema.table1, before=[2, , Hi 2~], after=[2, x, Hi 2~], op=UPDATE, meta=({op_ts=5})}"); + } + // -------------------------- // Flink-compatible UDF tests // -------------------------- diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-doris/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-doris/pom.xml index 2507dc319e5..079a0140394 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-doris/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-doris/pom.xml @@ -77,12 +77,24 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + +
org.apache.flink flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.slf4j diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml index 516eb1cb70e..de00614494d 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-elasticsearch/pom.xml @@ -58,12 +58,24 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/pom.xml index 0ebfbc9d231..816d99fd584 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/pom.xml @@ -33,12 +33,12 @@ limitations under the License. - 0.7.0 + 0.9.0-incubating - com.alibaba.fluss + org.apache.fluss fluss-client ${fluss.version} @@ -57,37 +57,43 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + - com.alibaba.fluss + org.apache.fluss fluss-server ${fluss.version} test - com.alibaba.fluss + org.apache.fluss fluss-server ${fluss.version} test-jar test - com.alibaba.fluss + org.apache.fluss fluss-test-utils ${fluss.version} test - com.alibaba.fluss + org.apache.fluss fluss-flink-common ${fluss.version} test-jar test - com.alibaba.fluss + org.apache.fluss fluss-flink-1.20 ${fluss.version} test @@ -123,7 +129,7 @@ limitations under the License. false - com.alibaba.fluss:* + org.apache.fluss:* diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/factory/FlussDataSinkFactory.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/factory/FlussDataSinkFactory.java index 754eb693a39..ca3a6a98ef2 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/factory/FlussDataSinkFactory.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/factory/FlussDataSinkFactory.java @@ -23,8 +23,8 @@ import org.apache.flink.cdc.common.sink.DataSink; import org.apache.flink.cdc.connectors.fluss.sink.FlussDataSink; -import com.alibaba.fluss.config.ConfigOptions; -import com.alibaba.fluss.config.Configuration; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; import java.util.HashMap; import java.util.HashSet; @@ -32,13 +32,13 @@ import java.util.Map; import java.util.Set; -import static org.apache.flink.cdc.connectors.fluss.sink.FlussConfigUtils.parseBucketKeys; -import static org.apache.flink.cdc.connectors.fluss.sink.FlussConfigUtils.parseBucketNumber; import static org.apache.flink.cdc.connectors.fluss.sink.FlussDataSinkOptions.BOOTSTRAP_SERVERS; import static org.apache.flink.cdc.connectors.fluss.sink.FlussDataSinkOptions.BUCKET_KEY; import static org.apache.flink.cdc.connectors.fluss.sink.FlussDataSinkOptions.BUCKET_NUMBER; import static org.apache.flink.cdc.connectors.fluss.sink.FlussDataSinkOptions.CLIENT_PROPERTIES_PREFIX; import static org.apache.flink.cdc.connectors.fluss.sink.FlussDataSinkOptions.TABLE_PROPERTIES_PREFIX; +import static org.apache.flink.cdc.connectors.fluss.utils.FlussConfigUtils.parseBucketKeys; +import static org.apache.flink.cdc.connectors.fluss.utils.FlussConfigUtils.parseBucketNumber; /** Factory for creating configured instances of {@link FlussDataSink}. */ public class FlussDataSinkFactory implements DataSinkFactory { diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussDataSink.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussDataSink.java index e03caf8e6d7..4491e503211 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussDataSink.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussDataSink.java @@ -25,7 +25,7 @@ import org.apache.flink.cdc.common.sink.MetadataApplier; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussSink; -import com.alibaba.fluss.config.Configuration; +import org.apache.fluss.config.Configuration; import java.util.List; import java.util.Map; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchema.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchema.java index e92557b111e..086da58946d 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchema.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchema.java @@ -24,14 +24,15 @@ import org.apache.flink.cdc.common.event.SchemaChangeEvent; import org.apache.flink.cdc.common.event.TableId; import org.apache.flink.cdc.common.utils.Preconditions; +import org.apache.flink.cdc.connectors.fluss.sink.row.CdcAsFlussRow; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussEvent; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussEventSerializer; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussRowWithOp; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.table.Table; -import com.alibaba.fluss.metadata.TablePath; -import com.alibaba.fluss.types.DataType; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.table.Table; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.types.DataType; import java.io.IOException; import java.util.Collections; @@ -129,12 +130,12 @@ private TablePath getTablePath(TableId tableId) { private static class TableSchemaInfo { org.apache.flink.cdc.common.schema.Schema upstreamCdcSchema; - com.alibaba.fluss.metadata.Schema downStreamFlusstreamSchema; + org.apache.fluss.metadata.Schema downStreamFlusstreamSchema; Map indexMapping; private TableSchemaInfo( org.apache.flink.cdc.common.schema.Schema upstreamCdcSchema, - com.alibaba.fluss.metadata.Schema downStreamFlusstreamSchema) { + org.apache.fluss.metadata.Schema downStreamFlusstreamSchema) { this.upstreamCdcSchema = upstreamCdcSchema; this.downStreamFlusstreamSchema = downStreamFlusstreamSchema; this.indexMapping = @@ -144,8 +145,8 @@ private TableSchemaInfo( } static Map sanityCheckAndGenerateIndexMapping( - com.alibaba.fluss.metadata.Schema inferredFlussSchema, - com.alibaba.fluss.metadata.Schema currentFlussNewSchema) { + org.apache.fluss.metadata.Schema inferredFlussSchema, + org.apache.fluss.metadata.Schema currentFlussNewSchema) { List inferredSchemaColumnNames = inferredFlussSchema.getColumnNames(); Map reverseIndex = new HashMap<>(); for (int i = 0; i < inferredSchemaColumnNames.size(); i++) { diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetaDataApplier.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetaDataApplier.java index 2e13b70d95c..4e28623e210 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetaDataApplier.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetaDataApplier.java @@ -26,14 +26,14 @@ import org.apache.flink.cdc.common.sink.MetadataApplier; import org.apache.flink.table.api.ValidationException; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.ConnectionFactory; -import com.alibaba.fluss.client.admin.Admin; -import com.alibaba.fluss.config.Configuration; -import com.alibaba.fluss.metadata.DatabaseDescriptor; -import com.alibaba.fluss.metadata.TableDescriptor; -import com.alibaba.fluss.metadata.TableInfo; -import com.alibaba.fluss.metadata.TablePath; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.metadata.DatabaseDescriptor; +import org.apache.fluss.metadata.TableDescriptor; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussArray.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussArray.java new file mode 100644 index 00000000000..4fa32e6a862 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussArray.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.fluss.sink.row; + +import org.apache.flink.cdc.common.data.ArrayData; +import org.apache.flink.cdc.common.data.TimestampData; + +import org.apache.fluss.row.BinaryString; +import org.apache.fluss.row.Decimal; +import org.apache.fluss.row.InternalArray; +import org.apache.fluss.row.InternalMap; +import org.apache.fluss.row.InternalRow; +import org.apache.fluss.row.TimestampLtz; +import org.apache.fluss.row.TimestampNtz; + +import static org.apache.flink.cdc.connectors.fluss.sink.row.CdcAsFlussRow.fromFlinkDecimal; + +/** Wraps a CDC {@link ArrayData} as a Fluss {@link InternalArray}. */ +public class CdcAsFlussArray implements InternalArray { + + private final ArrayData flussArray; + + public CdcAsFlussArray(ArrayData flussArray) { + this.flussArray = flussArray; + } + + @Override + public int size() { + return flussArray.size(); + } + + @Override + public boolean[] toBooleanArray() { + return flussArray.toBooleanArray(); + } + + @Override + public byte[] toByteArray() { + return flussArray.toByteArray(); + } + + @Override + public short[] toShortArray() { + return flussArray.toShortArray(); + } + + @Override + public int[] toIntArray() { + return flussArray.toIntArray(); + } + + @Override + public long[] toLongArray() { + return flussArray.toLongArray(); + } + + @Override + public float[] toFloatArray() { + return flussArray.toFloatArray(); + } + + @Override + public double[] toDoubleArray() { + return flussArray.toDoubleArray(); + } + + @Override + public boolean isNullAt(int pos) { + return flussArray.isNullAt(pos); + } + + @Override + public boolean getBoolean(int pos) { + return flussArray.getBoolean(pos); + } + + @Override + public byte getByte(int pos) { + return flussArray.getByte(pos); + } + + @Override + public short getShort(int pos) { + return flussArray.getShort(pos); + } + + @Override + public int getInt(int pos) { + return flussArray.getInt(pos); + } + + @Override + public long getLong(int pos) { + return flussArray.getLong(pos); + } + + @Override + public float getFloat(int pos) { + return flussArray.getFloat(pos); + } + + @Override + public double getDouble(int pos) { + return flussArray.getDouble(pos); + } + + @Override + public BinaryString getChar(int pos, int length) { + return BinaryString.fromBytes(flussArray.getString(pos).toBytes()); + } + + @Override + public BinaryString getString(int pos) { + return BinaryString.fromBytes(flussArray.getString(pos).toBytes()); + } + + @Override + public Decimal getDecimal(int pos, int precision, int scale) { + return fromFlinkDecimal(flussArray.getDecimal(pos, precision, scale)); + } + + @Override + public TimestampNtz getTimestampNtz(int pos, int precision) { + TimestampData timestamp = flussArray.getTimestamp(pos, precision); + return TimestampNtz.fromMillis( + timestamp.getMillisecond(), timestamp.getNanoOfMillisecond()); + } + + @Override + public TimestampLtz getTimestampLtz(int pos, int precision) { + TimestampData timestamp = flussArray.getTimestamp(pos, precision); + return TimestampLtz.fromEpochMillis( + timestamp.getMillisecond(), timestamp.getNanoOfMillisecond()); + } + + @Override + public byte[] getBinary(int pos, int length) { + return flussArray.getBinary(pos); + } + + @Override + public byte[] getBytes(int pos) { + return flussArray.getBinary(pos); + } + + @Override + public InternalArray getArray(int pos) { + return new CdcAsFlussArray(flussArray.getArray(pos)); + } + + @Override + public InternalMap getMap(int pos) { + return new CdcAsFlussMap(flussArray.getMap(pos)); + } + + @Override + public InternalRow getRow(int pos, int numFields) { + return CdcAsFlussRow.replace(flussArray.getRecord(pos, numFields)); + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussMap.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussMap.java new file mode 100644 index 00000000000..b209001eec4 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussMap.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.fluss.sink.row; + +import org.apache.flink.cdc.common.data.MapData; + +import org.apache.fluss.row.InternalArray; +import org.apache.fluss.row.InternalMap; + +/** Wraps a Cdc {@link MapData} as a Fluss {@link InternalMap}. */ +public class CdcAsFlussMap implements InternalMap { + + private final MapData cdcMap; + + public CdcAsFlussMap(MapData cdcMap) { + this.cdcMap = cdcMap; + } + + @Override + public int size() { + return cdcMap.size(); + } + + @Override + public InternalArray keyArray() { + return new CdcAsFlussArray(cdcMap.keyArray()); + } + + @Override + public InternalArray valueArray() { + return new CdcAsFlussArray(cdcMap.valueArray()); + } +} diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/CdcAsFlussRow.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussRow.java similarity index 87% rename from flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/CdcAsFlussRow.java rename to flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussRow.java index 5891d58a721..4a97ebc2245 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/CdcAsFlussRow.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/row/CdcAsFlussRow.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.fluss.sink; +package org.apache.flink.cdc.connectors.fluss.sink.row; import org.apache.flink.cdc.common.annotation.Internal; import org.apache.flink.cdc.common.annotation.VisibleForTesting; @@ -24,11 +24,13 @@ import org.apache.flink.cdc.common.data.RecordData; import org.apache.flink.cdc.common.data.TimestampData; -import com.alibaba.fluss.row.BinaryString; -import com.alibaba.fluss.row.Decimal; -import com.alibaba.fluss.row.InternalRow; -import com.alibaba.fluss.row.TimestampLtz; -import com.alibaba.fluss.row.TimestampNtz; +import org.apache.fluss.row.BinaryString; +import org.apache.fluss.row.Decimal; +import org.apache.fluss.row.InternalArray; +import org.apache.fluss.row.InternalMap; +import org.apache.fluss.row.InternalRow; +import org.apache.fluss.row.TimestampLtz; +import org.apache.fluss.row.TimestampNtz; import java.util.Map; import java.util.stream.Collectors; @@ -167,6 +169,22 @@ public byte[] getBytes(int pos) { return cdcRecord.getBinary(indexMapping.get(pos)); } + @Override + public InternalArray getArray(int i) { + return new CdcAsFlussArray(cdcRecord.getArray(indexMapping.get(i))); + } + + @Override + public InternalMap getMap(int i) { + return new CdcAsFlussMap(cdcRecord.getMap(indexMapping.get(i))); + } + + @Override + public InternalRow getRow(int i, int numFields) { + return new CdcAsFlussRow( + cdcRecord.getRow(indexMapping.get(i), numFields), numFields, indexMapping); + } + @VisibleForTesting public RecordData getCdcRecord() { return cdcRecord; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEvent.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEvent.java index 75df49fa6d5..a31d07bd404 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEvent.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEvent.java @@ -17,7 +17,7 @@ package org.apache.flink.cdc.connectors.fluss.sink.v2; -import com.alibaba.fluss.metadata.TablePath; +import org.apache.fluss.metadata.TablePath; import java.util.List; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEventSerializer.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEventSerializer.java index dadf0d2ee14..c90cdc91369 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEventSerializer.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussEventSerializer.java @@ -17,7 +17,7 @@ package org.apache.flink.cdc.connectors.fluss.sink.v2; -import com.alibaba.fluss.client.Connection; +import org.apache.fluss.client.Connection; import java.io.IOException; import java.io.Serializable; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussRowWithOp.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussRowWithOp.java index 0dedaf6cc52..2042dbdc440 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussRowWithOp.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussRowWithOp.java @@ -17,13 +17,13 @@ package org.apache.flink.cdc.connectors.fluss.sink.v2; -import com.alibaba.fluss.row.InternalRow; +import org.apache.fluss.row.InternalRow; import javax.annotation.Nullable; import java.util.Objects; -import static com.alibaba.fluss.utils.Preconditions.checkNotNull; +import static org.apache.fluss.utils.Preconditions.checkNotNull; /* This file is based on source code of Apache Fluss Project (https://fluss.apache.org/), licensed by the Apache * Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSink.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSink.java index 4e6d789e90b..0bd74f0601e 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSink.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSink.java @@ -22,7 +22,7 @@ import org.apache.flink.api.connector.sink2.WriterInitContext; import org.apache.flink.runtime.metrics.groups.InternalSinkWriterMetricGroup; -import com.alibaba.fluss.config.Configuration; +import org.apache.fluss.config.Configuration; import java.io.IOException; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkWriter.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkWriter.java index dab9e6ed56e..8a3754e0e9e 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkWriter.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkWriter.java @@ -23,18 +23,18 @@ import org.apache.flink.metrics.Counter; import org.apache.flink.metrics.groups.SinkWriterMetricGroup; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.ConnectionFactory; -import com.alibaba.fluss.client.table.Table; -import com.alibaba.fluss.client.table.writer.AppendWriter; -import com.alibaba.fluss.client.table.writer.TableWriter; -import com.alibaba.fluss.client.table.writer.UpsertWriter; -import com.alibaba.fluss.config.Configuration; -import com.alibaba.fluss.metadata.TablePath; -import com.alibaba.fluss.metrics.Gauge; -import com.alibaba.fluss.metrics.Metric; -import com.alibaba.fluss.metrics.MetricNames; -import com.alibaba.fluss.row.InternalRow; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.table.Table; +import org.apache.fluss.client.table.writer.AppendWriter; +import org.apache.fluss.client.table.writer.TableWriter; +import org.apache.fluss.client.table.writer.UpsertWriter; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.metrics.Gauge; +import org.apache.fluss.metrics.Metric; +import org.apache.fluss.metrics.MetricNames; +import org.apache.fluss.row.InternalRow; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussCounter.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussCounter.java index 61bcff7b098..48d09409603 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussCounter.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussCounter.java @@ -26,9 +26,9 @@ /** An implementation of Flink's {@link Counter} which wraps Fluss's Counter. */ public class WrappedFlussCounter implements Counter { - private final com.alibaba.fluss.metrics.Counter flussCounter; + private final org.apache.fluss.metrics.Counter flussCounter; - public WrappedFlussCounter(com.alibaba.fluss.metrics.Counter flussCounter) { + public WrappedFlussCounter(org.apache.fluss.metrics.Counter flussCounter) { this.flussCounter = flussCounter; } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussGauge.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussGauge.java index 1b5df21d05b..45d7adaaf74 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussGauge.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrappedFlussGauge.java @@ -26,9 +26,9 @@ /** An implementation of Flink's {@link Gauge} which wraps Fluss's Gauge. */ public class WrappedFlussGauge implements Gauge { - private final com.alibaba.fluss.metrics.Gauge flussGauge; + private final org.apache.fluss.metrics.Gauge flussGauge; - public WrappedFlussGauge(com.alibaba.fluss.metrics.Gauge flussGauge) { + public WrappedFlussGauge(org.apache.fluss.metrics.Gauge flussGauge) { this.flussGauge = flussGauge; } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussHistogram.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussHistogram.java index a62a646a348..402c5cc1aa8 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussHistogram.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussHistogram.java @@ -27,9 +27,9 @@ /** An implementation of Flink's {@link Histogram} which wraps Fluss's Histogram. */ public class WrapperFlussHistogram implements Histogram { - private final com.alibaba.fluss.metrics.Histogram flussHistogram; + private final org.apache.fluss.metrics.Histogram flussHistogram; - public WrapperFlussHistogram(com.alibaba.fluss.metrics.Histogram flussHistogram) { + public WrapperFlussHistogram(org.apache.fluss.metrics.Histogram flussHistogram) { this.flussHistogram = flussHistogram; } @@ -50,10 +50,10 @@ public HistogramStatistics getStatistics() { private static class FlinkHistogramStatistics extends HistogramStatistics { - private final com.alibaba.fluss.metrics.HistogramStatistics flussHistogramStatistics; + private final org.apache.fluss.metrics.HistogramStatistics flussHistogramStatistics; public FlinkHistogramStatistics( - com.alibaba.fluss.metrics.HistogramStatistics flussHistogramStatistics) { + org.apache.fluss.metrics.HistogramStatistics flussHistogramStatistics) { this.flussHistogramStatistics = flussHistogramStatistics; } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMeter.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMeter.java index 40bac46a80a..0b415b20241 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMeter.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMeter.java @@ -26,9 +26,9 @@ /** An implementation of Flink's {@link Meter} which wraps Fluss's Meter. */ public class WrapperFlussMeter implements Meter { - private final com.alibaba.fluss.metrics.Meter flussMeter; + private final org.apache.fluss.metrics.Meter flussMeter; - public WrapperFlussMeter(com.alibaba.fluss.metrics.Meter flussMeter) { + public WrapperFlussMeter(org.apache.fluss.metrics.Meter flussMeter) { this.flussMeter = flussMeter; } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMetricRegistry.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMetricRegistry.java index 4251a71d67d..39932bc2ef2 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMetricRegistry.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/v2/metrics/WrapperFlussMetricRegistry.java @@ -19,14 +19,14 @@ import org.apache.flink.metrics.MetricGroup; -import com.alibaba.fluss.metrics.CharacterFilter; -import com.alibaba.fluss.metrics.Counter; -import com.alibaba.fluss.metrics.Gauge; -import com.alibaba.fluss.metrics.Histogram; -import com.alibaba.fluss.metrics.Meter; -import com.alibaba.fluss.metrics.Metric; -import com.alibaba.fluss.metrics.groups.AbstractMetricGroup; -import com.alibaba.fluss.metrics.registry.MetricRegistry; +import org.apache.fluss.metrics.CharacterFilter; +import org.apache.fluss.metrics.Counter; +import org.apache.fluss.metrics.Gauge; +import org.apache.fluss.metrics.Histogram; +import org.apache.fluss.metrics.Meter; +import org.apache.fluss.metrics.Metric; +import org.apache.fluss.metrics.groups.AbstractMetricGroup; +import org.apache.fluss.metrics.registry.MetricRegistry; import java.util.Collections; import java.util.HashMap; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussConfigUtils.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConfigUtils.java similarity index 91% rename from flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussConfigUtils.java rename to flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConfigUtils.java index 000cabe08bc..6d850975648 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/sink/FlussConfigUtils.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConfigUtils.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.fluss.sink; +package org.apache.flink.cdc.connectors.fluss.utils; import javax.annotation.Nullable; @@ -24,7 +24,7 @@ import java.util.List; import java.util.Map; -/** Utils for parsing fluss yaml sink options. */ +/** Utils for parsing fluss Yaml sink options. */ public class FlussConfigUtils { public static Map> parseBucketKeys(@Nullable String rawValue) throws IllegalArgumentException { @@ -66,11 +66,8 @@ public static Map parseBucketNumber(@Nullable String rawValue) throw new IllegalArgumentException( "Invalid bucket number configuration: " + rawValue); } - - String table = kv[0].trim(); try { - int value = Integer.parseInt(kv[1].trim()); - result.put(table, value); + result.put(kv[0].trim(), Integer.valueOf(kv[1].trim())); } catch (NumberFormatException ignored) { throw new IllegalArgumentException( "Invalid bucket number configuration: " + rawValue); @@ -78,4 +75,6 @@ public static Map parseBucketNumber(@Nullable String rawValue) } return result; } + + private FlussConfigUtils() {} } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversions.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversions.java index eb311dd0e7d..25a9f53b676 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversions.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/main/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversions.java @@ -22,6 +22,7 @@ import org.apache.flink.cdc.common.types.BinaryType; import org.apache.flink.cdc.common.types.BooleanType; import org.apache.flink.cdc.common.types.CharType; +import org.apache.flink.cdc.common.types.DataType; import org.apache.flink.cdc.common.types.DateType; import org.apache.flink.cdc.common.types.DecimalType; import org.apache.flink.cdc.common.types.DoubleType; @@ -37,11 +38,12 @@ import org.apache.flink.cdc.common.types.VarBinaryType; import org.apache.flink.cdc.common.types.VarCharType; import org.apache.flink.cdc.common.types.ZonedTimestampType; +import org.apache.flink.cdc.common.utils.Preconditions; import org.apache.flink.util.CollectionUtil; -import com.alibaba.fluss.annotation.VisibleForTesting; -import com.alibaba.fluss.metadata.Schema; -import com.alibaba.fluss.metadata.TableDescriptor; +import org.apache.fluss.annotation.VisibleForTesting; +import org.apache.fluss.metadata.Schema; +import org.apache.fluss.metadata.TableDescriptor; import javax.annotation.Nullable; @@ -85,7 +87,7 @@ public static TableDescriptor toFlussTable( .build(); } - public static com.alibaba.fluss.metadata.Schema toFlussSchema( + public static org.apache.fluss.metadata.Schema toFlussSchema( org.apache.flink.cdc.common.schema.Schema cdcSchema) { Schema.Builder schemBuilder = Schema.newBuilder(); if (!CollectionUtil.isNullOrEmpty(cdcSchema.primaryKeys())) { @@ -108,7 +110,7 @@ public static com.alibaba.fluss.metadata.Schema toFlussSchema( } @VisibleForTesting - private static com.alibaba.fluss.types.DataType toFlussType( + private static org.apache.fluss.types.DataType toFlussType( org.apache.flink.cdc.common.types.DataType flinkDataType) { return flinkDataType.accept(TO_FLUSS_TYPE_INSTANCE); } @@ -137,119 +139,128 @@ public static Boolean sameCdcColumnsIgnoreCommentAndDefaultValue( private static class CdcTypeToFlussType implements org.apache.flink.cdc.common.types.DataTypeVisitor< - com.alibaba.fluss.types.DataType> { + org.apache.fluss.types.DataType> { @Override - public com.alibaba.fluss.types.DataType visit(CharType charType) { - return new com.alibaba.fluss.types.CharType( - charType.isNullable(), charType.getLength()); + public org.apache.fluss.types.DataType visit(CharType charType) { + return new org.apache.fluss.types.CharType(charType.isNullable(), charType.getLength()); } @Override - public com.alibaba.fluss.types.DataType visit(VarCharType varCharType) { + public org.apache.fluss.types.DataType visit(VarCharType varCharType) { // fluss not support varchar type - return new com.alibaba.fluss.types.StringType(varCharType.isNullable()); + return new org.apache.fluss.types.StringType(varCharType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(BooleanType booleanType) { - return new com.alibaba.fluss.types.BooleanType(booleanType.isNullable()); + public org.apache.fluss.types.DataType visit(BooleanType booleanType) { + return new org.apache.fluss.types.BooleanType(booleanType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(BinaryType binaryType) { - return new com.alibaba.fluss.types.BinaryType( + public org.apache.fluss.types.DataType visit(BinaryType binaryType) { + return new org.apache.fluss.types.BinaryType( binaryType.isNullable(), binaryType.getLength()); } @Override - public com.alibaba.fluss.types.DataType visit(VarBinaryType varBinaryType) { + public org.apache.fluss.types.DataType visit(VarBinaryType varBinaryType) { // fluss not support varbinary type - return new com.alibaba.fluss.types.BytesType(varBinaryType.isNullable()); + return new org.apache.fluss.types.BytesType(varBinaryType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(DecimalType decimalType) { - return new com.alibaba.fluss.types.DecimalType( + public org.apache.fluss.types.DataType visit(DecimalType decimalType) { + return new org.apache.fluss.types.DecimalType( decimalType.isNullable(), decimalType.getPrecision(), decimalType.getScale()); } @Override - public com.alibaba.fluss.types.DataType visit(TinyIntType tinyIntType) { - return new com.alibaba.fluss.types.TinyIntType(tinyIntType.isNullable()); + public org.apache.fluss.types.DataType visit(TinyIntType tinyIntType) { + return new org.apache.fluss.types.TinyIntType(tinyIntType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(SmallIntType smallIntType) { - return new com.alibaba.fluss.types.SmallIntType(smallIntType.isNullable()); + public org.apache.fluss.types.DataType visit(SmallIntType smallIntType) { + return new org.apache.fluss.types.SmallIntType(smallIntType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(IntType intType) { - return new com.alibaba.fluss.types.IntType(intType.isNullable()); + public org.apache.fluss.types.DataType visit(IntType intType) { + return new org.apache.fluss.types.IntType(intType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(BigIntType bigIntType) { - return new com.alibaba.fluss.types.BigIntType(bigIntType.isNullable()); + public org.apache.fluss.types.DataType visit(BigIntType bigIntType) { + return new org.apache.fluss.types.BigIntType(bigIntType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(FloatType floatType) { - return new com.alibaba.fluss.types.FloatType(floatType.isNullable()); + public org.apache.fluss.types.DataType visit(FloatType floatType) { + return new org.apache.fluss.types.FloatType(floatType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(DoubleType doubleType) { - return new com.alibaba.fluss.types.DoubleType(doubleType.isNullable()); + public org.apache.fluss.types.DataType visit(DoubleType doubleType) { + return new org.apache.fluss.types.DoubleType(doubleType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(DateType dateType) { - return new com.alibaba.fluss.types.DateType(dateType.isNullable()); + public org.apache.fluss.types.DataType visit(DateType dateType) { + return new org.apache.fluss.types.DateType(dateType.isNullable()); } @Override - public com.alibaba.fluss.types.DataType visit(TimeType timeType) { - return new com.alibaba.fluss.types.TimeType( + public org.apache.fluss.types.DataType visit(TimeType timeType) { + return new org.apache.fluss.types.TimeType( timeType.isNullable(), timeType.getPrecision()); } @Override - public com.alibaba.fluss.types.DataType visit(TimestampType timestampType) { - return new com.alibaba.fluss.types.TimestampType( + public org.apache.fluss.types.DataType visit(TimestampType timestampType) { + return new org.apache.fluss.types.TimestampType( timestampType.isNullable(), timestampType.getPrecision()); } @Override - public com.alibaba.fluss.types.DataType visit(ZonedTimestampType zonedTimestampType) { + public org.apache.fluss.types.DataType visit(ZonedTimestampType zonedTimestampType) { throw new UnsupportedOperationException( "Unsupported data type in fluss " + zonedTimestampType); } @Override - public com.alibaba.fluss.types.DataType visit( + public org.apache.fluss.types.DataType visit( LocalZonedTimestampType localZonedTimestampType) { - return new com.alibaba.fluss.types.LocalZonedTimestampType( + return new org.apache.fluss.types.LocalZonedTimestampType( localZonedTimestampType.isNullable(), localZonedTimestampType.getPrecision()); } @Override - public com.alibaba.fluss.types.DataType visit(ArrayType arrayType) { - throw new UnsupportedOperationException( - "Unsupported data type in fluss version under 0.7: " + arrayType); + public org.apache.fluss.types.DataType visit(ArrayType arrayType) { + List children = arrayType.getChildren(); + Preconditions.checkState(!children.isEmpty()); + org.apache.fluss.types.DataType flussChildType = toFlussType(children.get(0)); + return new org.apache.fluss.types.ArrayType(arrayType.isNullable(), flussChildType); } @Override - public com.alibaba.fluss.types.DataType visit(MapType mapType) { - throw new UnsupportedOperationException( - "Unsupported data type in fluss version under 0.7: " + mapType); + public org.apache.fluss.types.DataType visit(MapType mapType) { + org.apache.fluss.types.DataType flussKeyType = toFlussType(mapType.getKeyType()); + org.apache.fluss.types.DataType flussValueType = toFlussType(mapType.getValueType()); + return new org.apache.fluss.types.MapType( + mapType.isNullable(), flussKeyType, flussValueType); } @Override - public com.alibaba.fluss.types.DataType visit(RowType rowType) { - throw new UnsupportedOperationException( - "Unsupported data type in fluss version under 0.7: " + rowType); + public org.apache.fluss.types.DataType visit(RowType rowType) { + return new org.apache.fluss.types.RowType( + rowType.isNullable(), + rowType.getFields().stream() + .map( + field -> + new org.apache.fluss.types.DataField( + field.getName(), field.getType().accept(this))) + .collect(Collectors.toList())); } } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/FlussPipelineITCase.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/FlussPipelineITCase.java index a625f0fd9b9..bfc71a1adde 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/FlussPipelineITCase.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/FlussPipelineITCase.java @@ -45,10 +45,12 @@ import org.apache.flink.types.Row; import org.apache.flink.util.CloseableIterator; -import com.alibaba.fluss.config.ConfigOptions; -import com.alibaba.fluss.config.MemorySize; -import com.alibaba.fluss.metadata.DataLakeFormat; -import com.alibaba.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.MemorySize; +import org.apache.fluss.metadata.DataLakeFormat; +import org.apache.fluss.server.testutils.FlussClusterExtension; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -62,12 +64,12 @@ import java.util.List; import java.util.Map; -import static com.alibaba.fluss.config.ConfigOptions.BOOTSTRAP_SERVERS; -import static com.alibaba.fluss.flink.source.testutils.FlinkRowAssertionsUtils.assertResultsIgnoreOrder; -import static com.alibaba.fluss.server.testutils.FlussClusterExtension.BUILTIN_DATABASE; import static org.apache.flink.cdc.connectors.values.source.ValuesDataSourceHelper.TABLE_1; import static org.apache.flink.cdc.connectors.values.source.ValuesDataSourceHelper.TABLE_2; import static org.apache.flink.configuration.CoreOptions.ALWAYS_PARENT_FIRST_LOADER_PATTERNS_ADDITIONAL; +import static org.apache.fluss.config.ConfigOptions.BOOTSTRAP_SERVERS; +import static org.apache.fluss.flink.source.testutils.FlinkRowAssertionsUtils.assertResultsIgnoreOrder; +import static org.apache.fluss.server.testutils.FlussClusterExtension.BUILTIN_DATABASE; import static org.assertj.core.api.Assertions.assertThatThrownBy; /** ITCase for Fluss Pipeline. */ @@ -117,7 +119,8 @@ public class FlussPipelineITCase { protected TableEnvironment tBatchEnv; @BeforeEach - void before() { + void before() throws Exception { + waitForFlussClusterReady(); // open a catalog so that we can get table from the catalog String bootstrapServers = FLUSS_CLUSTER_EXTENSION.getBootstrapServers(); @@ -137,6 +140,27 @@ void before() { tBatchEnv.useDatabase(DEFAULT_DB); } + private void waitForFlussClusterReady() throws Exception { + int maxRetries = 30; + int retryIntervalMs = 1000; + Exception lastException = null; + + for (int i = 0; i < maxRetries; i++) { + try (Connection connection = + ConnectionFactory.createConnection(FLUSS_CLUSTER_EXTENSION.getClientConfig())) { + // Connection successful, cluster is ready + return; + } catch (Exception e) { + lastException = e; + Thread.sleep(retryIntervalMs); + } + } + + throw new IllegalStateException( + "Failed to connect to Fluss cluster after " + maxRetries + " attempts", + lastException); + } + @AfterEach void after() { tBatchEnv.useDatabase(BUILTIN_DATABASE); @@ -296,7 +320,7 @@ void testWrongTableOptions() { ValuesDataSourceHelper.singleSplitSingleTable(), sinkOption)) .rootCause() - .hasMessageContaining("'table.non-key' is not a Fluss table property"); + .hasMessageContaining("'table.non-key' is not a recognized Fluss table property"); } @Test @@ -542,8 +566,8 @@ private void checkResult(TableId tableId, List expectedRows) { assertResultsIgnoreOrder(rowIter, expectedRows, true); } - private static com.alibaba.fluss.config.Configuration initConfig() { - com.alibaba.fluss.config.Configuration conf = new com.alibaba.fluss.config.Configuration(); + private static org.apache.fluss.config.Configuration initConfig() { + org.apache.fluss.config.Configuration conf = new org.apache.fluss.config.Configuration(); conf.setInt(ConfigOptions.DEFAULT_REPLICATION_FACTOR, 3); // set a shorter interval for testing purpose conf.set(ConfigOptions.KV_SNAPSHOT_INTERVAL, Duration.ofSeconds(1)); @@ -560,7 +584,7 @@ private static com.alibaba.fluss.config.Configuration initConfig() { conf.setString("security.sasl.enabled.mechanisms", "plain"); conf.setString( "security.sasl.plain.jaas.config", - "com.alibaba.fluss.security.auth.sasl.plain.PlainLoginModule required " + "org.apache.fluss.security.auth.sasl.plain.PlainLoginModule required " + " user_root=\"password\" " + " user_guest=\"password2\";"); return conf; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchemaTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchemaTest.java index 7097a72ee45..1af59aa9792 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchemaTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussEventSerializationSchemaTest.java @@ -36,15 +36,16 @@ import org.apache.flink.cdc.common.types.LocalZonedTimestampType; import org.apache.flink.cdc.common.types.TimestampType; import org.apache.flink.cdc.common.types.VarCharType; +import org.apache.flink.cdc.connectors.fluss.sink.row.CdcAsFlussRow; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussEvent; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussOperationType; import org.apache.flink.cdc.connectors.fluss.sink.v2.FlussRowWithOp; import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.ConnectionFactory; -import com.alibaba.fluss.metadata.TablePath; -import com.alibaba.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.server.testutils.FlussClusterExtension; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetadataApplierTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetadataApplierTest.java index 9c35f002db7..c95aa786818 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetadataApplierTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/FlussMetadataApplierTest.java @@ -24,15 +24,15 @@ import org.apache.flink.cdc.common.types.DataTypes; import org.apache.flink.cdc.common.types.IntType; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.ConnectionFactory; -import com.alibaba.fluss.client.admin.Admin; -import com.alibaba.fluss.exception.InvalidConfigException; -import com.alibaba.fluss.metadata.TableDescriptor; -import com.alibaba.fluss.metadata.TableInfo; -import com.alibaba.fluss.metadata.TablePath; -import com.alibaba.fluss.server.testutils.FlussClusterExtension; -import com.alibaba.fluss.types.RowType; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.exception.InvalidConfigException; +import org.apache.fluss.metadata.TableDescriptor; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.types.RowType; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -47,7 +47,7 @@ import java.util.Map; import java.util.concurrent.ExecutionException; -import static com.alibaba.fluss.config.ConfigOptions.TABLE_REPLICATION_FACTOR; +import static org.apache.fluss.config.ConfigOptions.TABLE_REPLICATION_FACTOR; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -129,28 +129,28 @@ void testCreateTableAllTypes(boolean primaryKeyTable) throws Exception { DataTypes.TIMESTAMP_LTZ(6) }; - com.alibaba.fluss.types.DataType[] flussDataTypes = - new com.alibaba.fluss.types.DataType[] { - com.alibaba.fluss.types.DataTypes.BINARY(10), + org.apache.fluss.types.DataType[] flussDataTypes = + new org.apache.fluss.types.DataType[] { + org.apache.fluss.types.DataTypes.BINARY(10), // fluss not support binary, will be mapped to bytes - com.alibaba.fluss.types.DataTypes.BYTES(), - com.alibaba.fluss.types.DataTypes.BYTES(), - com.alibaba.fluss.types.DataTypes.BOOLEAN(), - com.alibaba.fluss.types.DataTypes.TINYINT(), - com.alibaba.fluss.types.DataTypes.SMALLINT(), - new com.alibaba.fluss.types.IntType(false), - com.alibaba.fluss.types.DataTypes.BIGINT(), - com.alibaba.fluss.types.DataTypes.FLOAT(), - com.alibaba.fluss.types.DataTypes.DOUBLE(), - com.alibaba.fluss.types.DataTypes.DECIMAL(38, 18), - com.alibaba.fluss.types.DataTypes.CHAR(10), + org.apache.fluss.types.DataTypes.BYTES(), + org.apache.fluss.types.DataTypes.BYTES(), + org.apache.fluss.types.DataTypes.BOOLEAN(), + org.apache.fluss.types.DataTypes.TINYINT(), + org.apache.fluss.types.DataTypes.SMALLINT(), + new org.apache.fluss.types.IntType(false), + org.apache.fluss.types.DataTypes.BIGINT(), + org.apache.fluss.types.DataTypes.FLOAT(), + org.apache.fluss.types.DataTypes.DOUBLE(), + org.apache.fluss.types.DataTypes.DECIMAL(38, 18), + org.apache.fluss.types.DataTypes.CHAR(10), // fluss not support varchar, will be mapped to string - com.alibaba.fluss.types.DataTypes.STRING(), - com.alibaba.fluss.types.DataTypes.STRING(), - com.alibaba.fluss.types.DataTypes.DATE(), - com.alibaba.fluss.types.DataTypes.TIME(), - com.alibaba.fluss.types.DataTypes.TIMESTAMP(3), - com.alibaba.fluss.types.DataTypes.TIMESTAMP_LTZ(6) + org.apache.fluss.types.DataTypes.STRING(), + org.apache.fluss.types.DataTypes.STRING(), + org.apache.fluss.types.DataTypes.DATE(), + org.apache.fluss.types.DataTypes.TIME(), + org.apache.fluss.types.DataTypes.TIMESTAMP(3), + org.apache.fluss.types.DataTypes.TIMESTAMP_LTZ(6) }; try (FlussMetaDataApplier applier = @@ -188,17 +188,10 @@ void testCreateTableAllTypes(boolean primaryKeyTable) throws Exception { @Test void testUnsupportedType() throws Exception { - String[] fieldNames = new String[] {"timestamp_tz_col", "array_col", "map_col", "row_col"}; + String[] fieldNames = new String[] {"timestamp_tz_col"}; org.apache.flink.cdc.common.types.DataType[] cdcDataTypes = - new org.apache.flink.cdc.common.types.DataType[] { - DataTypes.ARRAY(DataTypes.STRING()), - DataTypes.MAP(DataTypes.STRING(), DataTypes.INT()), - DataTypes.ROW( - DataTypes.FIELD("name", DataTypes.STRING()), - DataTypes.FIELD("age", DataTypes.INT())), - DataTypes.TIMESTAMP_TZ() - }; + new org.apache.flink.cdc.common.types.DataType[] {DataTypes.TIMESTAMP_TZ()}; try (FlussMetaDataApplier applier = new FlussMetaDataApplier( @@ -227,10 +220,10 @@ void testDropTableEvent() throws Exception { tablePath, TableDescriptor.builder() .schema( - com.alibaba.fluss.metadata.Schema.newBuilder() + org.apache.fluss.metadata.Schema.newBuilder() .column( "id", - com.alibaba.fluss.types.DataTypes.INT()) + org.apache.fluss.types.DataTypes.INT()) .build()) .build(), true) diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkITCase.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkITCase.java index e65a7d199eb..1cd80c6a6b5 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkITCase.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/sink/v2/FlussSinkITCase.java @@ -40,9 +40,9 @@ import org.apache.flink.types.Row; import org.apache.flink.util.CloseableIterator; -import com.alibaba.fluss.client.Connection; -import com.alibaba.fluss.client.ConnectionFactory; -import com.alibaba.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.server.testutils.FlussClusterExtension; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -58,9 +58,9 @@ import java.util.Collections; import java.util.List; -import static com.alibaba.fluss.config.ConfigOptions.BOOTSTRAP_SERVERS; -import static com.alibaba.fluss.flink.source.testutils.FlinkRowAssertionsUtils.assertResultsIgnoreOrder; -import static com.alibaba.fluss.server.testutils.FlussClusterExtension.BUILTIN_DATABASE; +import static org.apache.fluss.config.ConfigOptions.BOOTSTRAP_SERVERS; +import static org.apache.fluss.flink.source.testutils.FlinkRowAssertionsUtils.assertResultsIgnoreOrder; +import static org.apache.fluss.server.testutils.FlussClusterExtension.BUILTIN_DATABASE; import static org.assertj.core.api.Assertions.assertThatThrownBy; /** Integration tests for FlussSink. */ diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversionsTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversionsTest.java index 2f3487528e7..eddaeb6ff59 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversionsTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-fluss/src/test/java/org/apache/flink/cdc/connectors/fluss/utils/FlussConversionsTest.java @@ -20,8 +20,8 @@ import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; -import com.alibaba.fluss.metadata.TableDescriptor; -import com.alibaba.fluss.types.RowType; +import org.apache.fluss.metadata.TableDescriptor; +import org.apache.fluss.types.RowType; import org.junit.jupiter.api.Test; import java.util.Arrays; @@ -48,7 +48,7 @@ void testToFlussSchemaBasic() { .primaryKey("id") .build(); - com.alibaba.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); + org.apache.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); assertThat(flussSchema.getColumnNames()).containsExactly("id", "name"); assertThat(flussSchema.getPrimaryKeyColumnNames()).containsExactly("id"); @@ -62,7 +62,7 @@ void testToFlussSchemaWithoutPrimaryKey() { .physicalColumn("name", DataTypes.STRING()) .build(); - com.alibaba.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); + org.apache.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); assertThat(flussSchema.getColumnNames()).hasSize(2); assertThat(flussSchema.getPrimaryKeyColumnNames()).isEmpty(); @@ -88,33 +88,43 @@ void testToFlussSchemaTypeConversions() { .physicalColumn("time_col", DataTypes.TIME()) .physicalColumn("timestamp_col", DataTypes.TIMESTAMP(3)) .physicalColumn("ltz_col", DataTypes.TIMESTAMP_LTZ(6)) + .physicalColumn("arr", DataTypes.ARRAY(DataTypes.INT())) + .physicalColumn("map", DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())) .build(); - com.alibaba.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); + org.apache.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); RowType rowType = flussSchema.getRowType(); - assertThat(rowType.getFieldCount()).isEqualTo(16); - - assertThat(rowType.getTypeAt(0)).isEqualTo(com.alibaba.fluss.types.DataTypes.BOOLEAN()); - assertThat(rowType.getTypeAt(1)).isEqualTo(com.alibaba.fluss.types.DataTypes.TINYINT()); - assertThat(rowType.getTypeAt(2)).isEqualTo(com.alibaba.fluss.types.DataTypes.SMALLINT()); - assertThat(rowType.getTypeAt(3)).isEqualTo(com.alibaba.fluss.types.DataTypes.INT()); - assertThat(rowType.getTypeAt(4)).isEqualTo(com.alibaba.fluss.types.DataTypes.BIGINT()); - assertThat(rowType.getTypeAt(5)).isEqualTo(com.alibaba.fluss.types.DataTypes.FLOAT()); - assertThat(rowType.getTypeAt(6)).isEqualTo(com.alibaba.fluss.types.DataTypes.DOUBLE()); - assertThat(rowType.getTypeAt(7)) - .isEqualTo(com.alibaba.fluss.types.DataTypes.DECIMAL(10, 2)); - assertThat(rowType.getTypeAt(8)).isEqualTo(com.alibaba.fluss.types.DataTypes.CHAR(10)); + assertThat(rowType.getFieldCount()).isEqualTo(18); + + assertThat(rowType.getTypeAt(0)).isEqualTo(org.apache.fluss.types.DataTypes.BOOLEAN()); + assertThat(rowType.getTypeAt(1)).isEqualTo(org.apache.fluss.types.DataTypes.TINYINT()); + assertThat(rowType.getTypeAt(2)).isEqualTo(org.apache.fluss.types.DataTypes.SMALLINT()); + assertThat(rowType.getTypeAt(3)).isEqualTo(org.apache.fluss.types.DataTypes.INT()); + assertThat(rowType.getTypeAt(4)).isEqualTo(org.apache.fluss.types.DataTypes.BIGINT()); + assertThat(rowType.getTypeAt(5)).isEqualTo(org.apache.fluss.types.DataTypes.FLOAT()); + assertThat(rowType.getTypeAt(6)).isEqualTo(org.apache.fluss.types.DataTypes.DOUBLE()); + assertThat(rowType.getTypeAt(7)).isEqualTo(org.apache.fluss.types.DataTypes.DECIMAL(10, 2)); + assertThat(rowType.getTypeAt(8)).isEqualTo(org.apache.fluss.types.DataTypes.CHAR(10)); // VarChar maps to StringType in Fluss - assertThat(rowType.getTypeAt(9)).isEqualTo(com.alibaba.fluss.types.DataTypes.STRING()); - assertThat(rowType.getTypeAt(10)).isEqualTo(com.alibaba.fluss.types.DataTypes.BINARY(16)); + assertThat(rowType.getTypeAt(9)).isEqualTo(org.apache.fluss.types.DataTypes.STRING()); + assertThat(rowType.getTypeAt(10)).isEqualTo(org.apache.fluss.types.DataTypes.BINARY(16)); // VarBinary maps to BytesType in Fluss - assertThat(rowType.getTypeAt(11)).isEqualTo(com.alibaba.fluss.types.DataTypes.BYTES()); - assertThat(rowType.getTypeAt(12)).isEqualTo(com.alibaba.fluss.types.DataTypes.DATE()); - assertThat(rowType.getTypeAt(13)).isEqualTo(com.alibaba.fluss.types.DataTypes.TIME()); - assertThat(rowType.getTypeAt(14)).isEqualTo(com.alibaba.fluss.types.DataTypes.TIMESTAMP(3)); + assertThat(rowType.getTypeAt(11)).isEqualTo(org.apache.fluss.types.DataTypes.BYTES()); + assertThat(rowType.getTypeAt(12)).isEqualTo(org.apache.fluss.types.DataTypes.DATE()); + assertThat(rowType.getTypeAt(13)).isEqualTo(org.apache.fluss.types.DataTypes.TIME()); + assertThat(rowType.getTypeAt(14)).isEqualTo(org.apache.fluss.types.DataTypes.TIMESTAMP(3)); assertThat(rowType.getTypeAt(15)) - .isEqualTo(com.alibaba.fluss.types.DataTypes.TIMESTAMP_LTZ(6)); + .isEqualTo(org.apache.fluss.types.DataTypes.TIMESTAMP_LTZ(6)); + assertThat(rowType.getTypeAt(16)) + .isEqualTo( + org.apache.fluss.types.DataTypes.ARRAY( + org.apache.fluss.types.DataTypes.INT())); + assertThat(rowType.getTypeAt(17)) + .isEqualTo( + org.apache.fluss.types.DataTypes.MAP( + org.apache.fluss.types.DataTypes.STRING(), + org.apache.fluss.types.DataTypes.INT())); } @Test @@ -126,7 +136,7 @@ void testToFlussSchemaTypeNullability() { .physicalColumn("not_null_col", DataTypes.INT().notNull()) .build(); - com.alibaba.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); + org.apache.fluss.metadata.Schema flussSchema = FlussConversions.toFlussSchema(cdcSchema); RowType rowType = flussSchema.getRowType(); assertThat(rowType.getTypeAt(0).isNullable()).isTrue(); @@ -146,28 +156,6 @@ void testToFlussSchemaUnsupportedZonedTimestamp() { .hasMessageContaining("Unsupported data type in fluss"); } - @Test - void testToFlussSchemaUnsupportedArrayType() { - Schema cdcSchema = - Schema.newBuilder().physicalColumn("arr", DataTypes.ARRAY(DataTypes.INT())).build(); - - assertThatThrownBy(() -> FlussConversions.toFlussSchema(cdcSchema)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageContaining("Unsupported data type in fluss"); - } - - @Test - void testToFlussSchemaUnsupportedMapType() { - Schema cdcSchema = - Schema.newBuilder() - .physicalColumn("map", DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())) - .build(); - - assertThatThrownBy(() -> FlussConversions.toFlussSchema(cdcSchema)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageContaining("Unsupported data type in fluss"); - } - // -------------------------------------------------------------------------------------------- // Tests for toFlussTable // -------------------------------------------------------------------------------------------- diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-iceberg/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-iceberg/pom.xml index 2d166ae2868..617bb8e16e1 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-iceberg/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-iceberg/pom.xml @@ -56,6 +56,10 @@ limitations under the License. junit-vintage-engine org.junit.vintage + + org.testcontainers + testcontainers + @@ -76,6 +80,12 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-kafka/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-kafka/pom.xml index dee1f2f460f..6b0399b5e0d 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-kafka/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-kafka/pom.xml @@ -74,6 +74,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/pom.xml index 371b1dacfa6..ec528cde751 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/pom.xml @@ -94,6 +94,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -101,6 +107,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oceanbase/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oceanbase/pom.xml index 2898c58b536..5ae8b6c4cb0 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oceanbase/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oceanbase/pom.xml @@ -77,6 +77,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/pom.xml index b091fbfc479..674b32284c2 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/pom.xml @@ -68,6 +68,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -75,6 +81,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/PaimonEventSink.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/PaimonEventSink.java index eb2af9f430e..9487a3cc22e 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/PaimonEventSink.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/PaimonEventSink.java @@ -69,6 +69,7 @@ public DataStream addPreWriteTopology(DataStream dataStream) { if (event instanceof BucketWrapperChangeEvent) { // Add hash of tableId to avoid data skew. return ((BucketWrapperChangeEvent) event).getBucket() + + ((BucketWrapperChangeEvent) event).getPartition() + ((BucketWrapperChangeEvent) event).tableId().hashCode(); } else { return ((BucketWrapper) event).getBucket(); diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketAssignOperator.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketAssignOperator.java index 1e2dd65a1c1..faa832fc705 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketAssignOperator.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketAssignOperator.java @@ -157,6 +157,7 @@ public void processElement(StreamRecord streamRecord) throws Exception { bucketAssignerMap.computeIfAbsent( dataChangeEvent.tableId(), this::getTableInfo); int bucket; + int partition; GenericRow genericRow = PaimonWriterHelper.convertEventToGenericRow( dataChangeEvent, @@ -171,17 +172,20 @@ public void processElement(StreamRecord streamRecord) throws Exception { tuple4.f2.assign( tuple4.f3.partition(genericRow), tuple4.f3.trimmedPrimaryKey(genericRow).hashCode()); + partition = tuple4.f3.partition(genericRow).hashCode(); break; } case HASH_FIXED: { tuple4.f1.setRecord(genericRow); bucket = tuple4.f1.bucket(); + partition = tuple4.f1.partition().hashCode(); break; } case BUCKET_UNAWARE: { bucket = 0; + partition = 0; break; } case KEY_DYNAMIC: @@ -191,7 +195,8 @@ public void processElement(StreamRecord streamRecord) throws Exception { } } output.collect( - new StreamRecord<>(new BucketWrapperChangeEvent(bucket, dataChangeEvent))); + new StreamRecord<>( + new BucketWrapperChangeEvent(bucket, partition, dataChangeEvent))); } else { // Broadcast SchemachangeEvent. for (int index = 0; index < totalTasksNumber; index++) { @@ -199,6 +204,7 @@ public void processElement(StreamRecord streamRecord) throws Exception { new StreamRecord<>( new BucketWrapperChangeEvent( index, + 0, convertSchemaChangeEvent((SchemaChangeEvent) event)))); } } @@ -279,6 +285,7 @@ public DataChangeEvent convertDataChangeEvent(DataChangeEvent dataChangeEvent) new StreamRecord<>( new BucketWrapperChangeEvent( index, + 0, new CreateTableEvent( tableId, mixedSchemaInfo.paimonSchemaInfo diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperChangeEvent.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperChangeEvent.java index 6c0b3c155ad..95e3888fc57 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperChangeEvent.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperChangeEvent.java @@ -27,11 +27,13 @@ public class BucketWrapperChangeEvent implements ChangeEvent, BucketWrapper, Serializable { private static final long serialVersionUID = 1L; private final int bucket; + private final int partition; private final ChangeEvent innerEvent; - public BucketWrapperChangeEvent(int bucket, ChangeEvent innerEvent) { + public BucketWrapperChangeEvent(int bucket, int partition, ChangeEvent innerEvent) { this.bucket = bucket; + this.partition = partition; this.innerEvent = innerEvent; } @@ -39,6 +41,10 @@ public int getBucket() { return bucket; } + public int getPartition() { + return partition; + } + public ChangeEvent getInnerEvent() { return innerEvent; } @@ -57,12 +63,14 @@ public boolean equals(Object o) { return false; } BucketWrapperChangeEvent that = (BucketWrapperChangeEvent) o; - return bucket == that.bucket && Objects.equals(innerEvent, that.innerEvent); + return bucket == that.bucket + && partition == that.partition + && Objects.equals(innerEvent, that.innerEvent); } @Override public int hashCode() { - return Objects.hash(bucket, innerEvent); + return Objects.hash(bucket, partition, innerEvent); } @Override @@ -70,6 +78,8 @@ public String toString() { return "BucketWrapperChangeEvent{" + "bucket=" + bucket + + ", partition=" + + partition + ", innerEvent=" + innerEvent + '}'; diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperEventSerializer.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperEventSerializer.java index 267500988c5..31d4452f9b8 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperEventSerializer.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-paimon/src/main/java/org/apache/flink/cdc/connectors/paimon/sink/v2/bucket/BucketWrapperEventSerializer.java @@ -82,6 +82,7 @@ public void serialize(Event event, DataOutputView dataOutputView) throws IOExcep BucketWrapperChangeEvent bucketWrapperChangeEvent = (BucketWrapperChangeEvent) event; enumSerializer.serialize(EventClass.BUCKET_WRAPPER_CHANGE_EVENT, dataOutputView); dataOutputView.writeInt(bucketWrapperChangeEvent.getBucket()); + dataOutputView.writeInt(bucketWrapperChangeEvent.getPartition()); eventSerializer.serialize(bucketWrapperChangeEvent.getInnerEvent(), dataOutputView); } else if (event instanceof BucketWrapperFlushEvent) { enumSerializer.serialize(EventClass.BUCKET_WRAPPER_FLUSH_EVENT, dataOutputView); @@ -107,7 +108,9 @@ public Event deserialize(DataInputView source) throws IOException { schemaChangeEventTypeEnumSerializer.deserialize(source)); } else { return new BucketWrapperChangeEvent( - source.readInt(), (ChangeEvent) eventSerializer.deserialize(source)); + source.readInt(), + source.readInt(), + (ChangeEvent) eventSerializer.deserialize(source)); } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/pom.xml index 528d8586084..0e45ea59733 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/pom.xml @@ -81,6 +81,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -88,6 +94,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java index 40b87f5d4e0..acecc2bac3b 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java @@ -646,85 +646,104 @@ public void testDatabaseNameWithHyphenEndToEnd() throws Exception { FlinkSourceProvider sourceProvider = (FlinkSourceProvider) dataSource.getEventSourceProvider(); - CloseableIterator events = + DataStreamSource source = testEnv.fromSource( - sourceProvider.getSource(), - WatermarkStrategy.noWatermarks(), - PostgresDataSourceFactory.IDENTIFIER, - new EventTypeInfo()) - .executeAndCollect(); + sourceProvider.getSource(), + WatermarkStrategy.noWatermarks(), + PostgresDataSourceFactory.IDENTIFIER, + new EventTypeInfo()); - // Collect events and verify data - List collectedEvents = new ArrayList<>(); - int expectedDataCount = 3; // We inserted 3 rows - int dataCount = 0; - int maxEvents = 10; // Safety limit - - while (events.hasNext() && collectedEvents.size() < maxEvents) { - Event event = events.next(); - collectedEvents.add(event); - if (event instanceof DataChangeEvent) { - dataCount++; - if (dataCount >= expectedDataCount) { - break; + TypeSerializer serializer = + source.getTransformation().getOutputType().createSerializer(testEnv.getConfig()); + CheckpointedCollectResultBuffer resultBuffer = + new CheckpointedCollectResultBuffer<>(serializer); + String accumulatorName = "dataStreamCollect_" + UUID.randomUUID(); + CollectResultIterator iterator = + addCollector(testEnv, source, resultBuffer, serializer, accumulatorName); + + JobClient jobClient = testEnv.executeAsync("testDatabaseNameWithHyphen"); + iterator.setJobClient(jobClient); + + try { + // Collect events and verify data + List collectedEvents = new ArrayList<>(); + int expectedDataCount = 3; // We inserted 3 rows + int dataCount = 0; + int maxEvents = 10; // Safety limit + + while (iterator.hasNext() && collectedEvents.size() < maxEvents) { + Event event = iterator.next(); + collectedEvents.add(event); + if (event instanceof DataChangeEvent) { + dataCount++; + if (dataCount >= expectedDataCount) { + break; + } } } - } - events.close(); - // Verify we received CreateTableEvent and DataChangeEvents - assertThat(collectedEvents).isNotEmpty(); + // Verify we received CreateTableEvent and DataChangeEvents + assertThat(collectedEvents).isNotEmpty(); - // Check for CreateTableEvent - long createTableEventCount = - collectedEvents.stream().filter(e -> e instanceof CreateTableEvent).count(); - assertThat(createTableEventCount).isGreaterThanOrEqualTo(1); + // Check for CreateTableEvent + long createTableEventCount = + collectedEvents.stream().filter(e -> e instanceof CreateTableEvent).count(); + assertThat(createTableEventCount).isGreaterThanOrEqualTo(1); - // Check for DataChangeEvents (INSERT events from snapshot) - List dataChangeEvents = - collectedEvents.stream() - .filter(e -> e instanceof DataChangeEvent) - .map(e -> (DataChangeEvent) e) - .collect(Collectors.toList()); + // Check for DataChangeEvents (INSERT events from snapshot) + List dataChangeEvents = + collectedEvents.stream() + .filter(e -> e instanceof DataChangeEvent) + .map(e -> (DataChangeEvent) e) + .collect(Collectors.toList()); - assertThat(dataChangeEvents).hasSize(expectedDataCount); - - // Verify the table ID in events - for (DataChangeEvent dce : dataChangeEvents) { - assertThat(dce.tableId().getSchemaName()).isEqualTo("public"); - assertThat(dce.tableId().getTableName()).isEqualTo("test_table"); - } + assertThat(dataChangeEvents).hasSize(expectedDataCount); - // Verify the data content - we should have 3 INSERT events with ids 1, 2, 3 - List actualIds = - dataChangeEvents.stream() - .map( - dce -> { - RecordData after = dce.after(); - return after.getInt(0); // id column - }) - .sorted() - .collect(Collectors.toList()); - assertThat(actualIds).containsExactly(1, 2, 3); + // Verify the table ID in events + for (DataChangeEvent dce : dataChangeEvents) { + assertThat(dce.tableId().getSchemaName()).isEqualTo("public"); + assertThat(dce.tableId().getTableName()).isEqualTo("test_table"); + } - // Cleanup - first drop replication slot, then terminate connections and drop database - try (Connection connection = getJdbcConnection(POSTGRES_CONTAINER); - Statement statement = connection.createStatement()) { - // Drop replication slot first (it was created during CDC connection) + // Verify the data content - we should have 3 INSERT events with ids 1, 2, 3 + List actualIds = + dataChangeEvents.stream() + .map( + dce -> { + RecordData after = dce.after(); + return after.getInt(0); // id column + }) + .sorted() + .collect(Collectors.toList()); + assertThat(actualIds).containsExactly(1, 2, 3); + } finally { + // Cancel the job with a bounded wait so cleanup always runs try { - statement.execute(String.format("SELECT pg_drop_replication_slot('%s')", slotName)); - } catch (SQLException e) { - // Ignore if slot doesn't exist - LOG.warn("Failed to drop replication slot: {}", e.getMessage()); + iterator.close(); + jobClient.cancel().get(); + } catch (Exception e) { + LOG.warn("Failed to cancel job: {}", e.getMessage()); + } + + // Wait for the job to fully stop and release the replication slot + Thread.sleep(3000); + + // Cleanup - drop replication slot, terminate connections and drop database + try (Connection connection = getJdbcConnection(POSTGRES_CONTAINER); + Statement statement = connection.createStatement()) { + try { + statement.execute( + String.format("SELECT pg_drop_replication_slot('%s')", slotName)); + } catch (SQLException e) { + LOG.warn("Failed to drop replication slot: {}", e.getMessage()); + } + statement.execute( + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '" + + hyphenDbName + + "'"); + Thread.sleep(500); + statement.execute("DROP DATABASE IF EXISTS \"" + hyphenDbName + "\""); } - // Terminate all connections to the database - statement.execute( - "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '" - + hyphenDbName - + "'"); - // Small delay to ensure connections are terminated - Thread.sleep(500); - statement.execute("DROP DATABASE IF EXISTS \"" + hyphenDbName + "\""); } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/pom.xml b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/pom.xml index 900703dd4b3..bbaa7862338 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/pom.xml +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/pom.xml @@ -61,12 +61,24 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/main/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksUtils.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/main/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksUtils.java index d302f297e2d..22237a5f33f 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/main/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksUtils.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/main/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksUtils.java @@ -33,6 +33,7 @@ import org.apache.flink.cdc.common.types.IntType; import org.apache.flink.cdc.common.types.LocalZonedTimestampType; import org.apache.flink.cdc.common.types.SmallIntType; +import org.apache.flink.cdc.common.types.TimeType; import org.apache.flink.cdc.common.types.TimestampType; import org.apache.flink.cdc.common.types.TinyIntType; import org.apache.flink.cdc.common.types.VarCharType; @@ -43,6 +44,8 @@ import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; import java.util.ArrayList; import java.util.List; @@ -132,6 +135,35 @@ public static void toStarRocksDataType( private static final DateTimeFormatter DATETIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + /** Format TIME type data. */ + private static final DateTimeFormatter TIME_FORMATTER = + new DateTimeFormatterBuilder().appendPattern("HH:mm:ss").toFormatter(); + + private static final DateTimeFormatter[] TIME_FORMATTERS = new DateTimeFormatter[10]; + + private static DateTimeFormatter timeFormatter(int precision) { + if (precision <= 0) { + return TIME_FORMATTER; + } + if (precision < TIME_FORMATTERS.length) { + DateTimeFormatter formatter = TIME_FORMATTERS[precision]; + if (formatter == null) { + formatter = + new DateTimeFormatterBuilder() + .appendPattern("HH:mm:ss") + .appendFraction( + ChronoField.NANO_OF_SECOND, precision, precision, true) + .toFormatter(); + TIME_FORMATTERS[precision] = formatter; + } + return formatter; + } + return new DateTimeFormatterBuilder() + .appendPattern("HH:mm:ss") + .appendFraction(ChronoField.NANO_OF_SECOND, precision, precision, true) + .toFormatter(); + } + /** * Creates an accessor for getting elements in an internal RecordData structure at the given * position. @@ -183,6 +215,13 @@ record -> fieldGetter = record -> record.getDate(fieldPos).toLocalDate().format(DATE_FORMATTER); break; + case TIME_WITHOUT_TIME_ZONE: + fieldGetter = + record -> + record.getTime(fieldPos) + .toLocalTime() + .format(timeFormatter(getPrecision(fieldType))); + break; case TIMESTAMP_WITHOUT_TIME_ZONE: fieldGetter = record -> @@ -374,6 +413,21 @@ public StarRocksColumn.Builder visit(DateType dateType) { return builder; } + @Override + public StarRocksColumn.Builder visit(TimeType timeType) { + // StarRocks does not support TIME type, so map it to VARCHAR. + // Format: HH:mm:ss for precision 0, HH:mm:ss.

for precision > 0 + // Maximum length: 8 (HH:mm:ss) + 1 (.) + precision = 8 + 1 + precision + // For precision 0: "HH:mm:ss" = 8 characters + // For precision > 0: "HH:mm:ss." + precision digits + builder.setDataType(VARCHAR); + builder.setNullable(timeType.isNullable()); + int precision = timeType.getPrecision(); + int length = precision > 0 ? 8 + 1 + precision : 8; + builder.setColumnSize(length); + return builder; + } + @Override public StarRocksColumn.Builder visit(TimestampType timestampType) { builder.setDataType(DATETIME); @@ -404,7 +458,8 @@ public static String convertInvalidTimestampDefaultValue( || dataType instanceof org.apache.flink.cdc.common.types.TimestampType || dataType instanceof org.apache.flink.cdc.common.types.ZonedTimestampType) { - if (INVALID_OR_MISSING_DATATIME.equals(defaultValue)) { + if (INVALID_OR_MISSING_DATATIME.equals(defaultValue) + || defaultValue.startsWith(INVALID_OR_MISSING_DATATIME)) { return DEFAULT_DATETIME; } } diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/EventRecordSerializationSchemaTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/EventRecordSerializationSchemaTest.java index 5d830b9352c..a9c6f240e7b 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/EventRecordSerializationSchemaTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/EventRecordSerializationSchemaTest.java @@ -28,6 +28,7 @@ import org.apache.flink.cdc.common.data.DateData; import org.apache.flink.cdc.common.data.DecimalData; import org.apache.flink.cdc.common.data.LocalZonedTimestampData; +import org.apache.flink.cdc.common.data.TimeData; import org.apache.flink.cdc.common.data.TimestampData; import org.apache.flink.cdc.common.data.binary.BinaryStringData; import org.apache.flink.cdc.common.event.AddColumnEvent; @@ -45,6 +46,7 @@ import org.apache.flink.cdc.common.types.IntType; import org.apache.flink.cdc.common.types.LocalZonedTimestampType; import org.apache.flink.cdc.common.types.SmallIntType; +import org.apache.flink.cdc.common.types.TimeType; import org.apache.flink.cdc.common.types.TimestampType; import org.apache.flink.cdc.common.types.VarCharType; import org.apache.flink.cdc.common.utils.SchemaUtils; @@ -71,6 +73,7 @@ import java.sql.Timestamp; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.LocalTime; import java.time.ZoneId; import java.time.ZoneOffset; import java.util.Arrays; @@ -260,6 +263,227 @@ void testMixedSchemaAndDataChanges() throws Exception { Objects.requireNonNull(serializer.serialize(insertEvent3))); } + @Test + void testTimeTypeSerialization() throws Exception { + TableId tableId = TableId.parse("test.time_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("start_time", new TimeType()) + .physicalColumn( + "end_time", new TimeType(3)) // TIME with millisecond precision + .primaryKey("id") + .build(); + + // Create table + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + Assertions.assertThat(serializer.serialize(createTableEvent)).isNull(); + + BinaryRecordDataGenerator generator = + new BinaryRecordDataGenerator(schema.getColumnDataTypes().toArray(new DataType[0])); + + // Test insert with TIME values + DataChangeEvent insertEvent = + DataChangeEvent.insertEvent( + tableId, + generator.generate( + new Object[] { + 1, + TimeData.fromLocalTime(LocalTime.of(9, 30, 15)), // 09:30:15 + TimeData.fromLocalTime( + LocalTime.of(17, 45, 30, 123000000)) // 17:45:30.123 + })); + + StarRocksRowData result = serializer.serialize(insertEvent); + Assertions.assertThat(result).isNotNull(); + + verifySerializeResult( + tableId, + "{\"id\":1,\"start_time\":\"09:30:15\",\"end_time\":\"17:45:30.123\",\"__op\":0}", + result); + } + + @Test + void testTimeTypeZeroSecondsFormat() throws Exception { + TableId tableId = TableId.parse("test.time_zero_seconds_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("zero_time", new TimeType()) + .primaryKey("id") + .build(); + + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + Assertions.assertThat(serializer.serialize(createTableEvent)).isNull(); + + BinaryRecordDataGenerator generator = + new BinaryRecordDataGenerator(schema.getColumnDataTypes().toArray(new DataType[0])); + + DataChangeEvent insertEvent = + DataChangeEvent.insertEvent( + tableId, + generator.generate( + new Object[] {1, TimeData.fromLocalTime(LocalTime.of(16, 0, 0))})); + + StarRocksRowData result = serializer.serialize(insertEvent); + Assertions.assertThat(result).isNotNull(); + + verifySerializeResult(tableId, "{\"id\":1,\"zero_time\":\"16:00:00\",\"__op\":0}", result); + } + + @Test + void testTimeTypeWithSchemaEvolution() throws Exception { + TableId tableId = TableId.parse("test.time_evolution_table"); + Schema initialSchema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("name", new VarCharType(20)) + .primaryKey("id") + .build(); + + // Create initial table + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, initialSchema); + Assertions.assertThat(serializer.serialize(createTableEvent)).isNull(); + + BinaryRecordDataGenerator initialGenerator = + new BinaryRecordDataGenerator( + initialSchema.getColumnDataTypes().toArray(new DataType[0])); + + // Insert initial data + DataChangeEvent initialInsert = + DataChangeEvent.insertEvent( + tableId, + initialGenerator.generate( + new Object[] {1, BinaryStringData.fromString("Initial Record")})); + + StarRocksRowData initialResult = serializer.serialize(initialInsert); + Assertions.assertThat(initialResult).isNotNull(); + + verifySerializeResult( + tableId, "{\"id\":1,\"name\":\"Initial Record\",\"__op\":0}", initialResult); + + // Simulate schema evolution: add TIME column + Schema evolvedSchema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("name", new VarCharType(20)) + .physicalColumn("created_time", new TimeType()) + .primaryKey("id") + .build(); + + // Create AddColumnEvent to simulate schema evolution + AddColumnEvent addColumnEvent = + new AddColumnEvent( + tableId, + Arrays.asList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("created_time", new TimeType()), + AddColumnEvent.ColumnPosition.LAST, + null))); + serializer.serialize(addColumnEvent); + + // Insert data with TIME column after schema evolution + BinaryRecordDataGenerator evolvedGenerator = + new BinaryRecordDataGenerator( + evolvedSchema.getColumnDataTypes().toArray(new DataType[0])); + + DataChangeEvent evolvedInsert = + DataChangeEvent.insertEvent( + tableId, + evolvedGenerator.generate( + new Object[] { + 2, + BinaryStringData.fromString("Evolved Record"), + TimeData.fromLocalTime(LocalTime.of(14, 30, 0)) // 14:30:00 + })); + + StarRocksRowData evolvedResult = serializer.serialize(evolvedInsert); + Assertions.assertThat(evolvedResult).isNotNull(); + + verifySerializeResult( + tableId, + "{\"id\":2,\"name\":\"Evolved Record\",\"created_time\":\"14:30:00\",\"__op\":0}", + evolvedResult); + } + + @Test + void testTimeTypeBoundaryValues() throws Exception { + TableId tableId = TableId.parse("test.time_boundary_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("min_time", new TimeType()) + .physicalColumn("max_time", new TimeType()) + .physicalColumn("midnight", new TimeType()) + .primaryKey("id") + .build(); + + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + Assertions.assertThat(serializer.serialize(createTableEvent)).isNull(); + + BinaryRecordDataGenerator generator = + new BinaryRecordDataGenerator(schema.getColumnDataTypes().toArray(new DataType[0])); + + // Test boundary TIME values + DataChangeEvent insertEvent = + DataChangeEvent.insertEvent( + tableId, + generator.generate( + new Object[] { + 1, + TimeData.fromLocalTime(LocalTime.MIN), // 00:00:00 + TimeData.fromLocalTime( + LocalTime + .MAX), // 23:59:59.999 (truncated to millisecond + // precision) + TimeData.fromLocalTime(LocalTime.MIDNIGHT) // 00:00:00 + })); + + StarRocksRowData result = serializer.serialize(insertEvent); + Assertions.assertThat(result).isNotNull(); + + verifySerializeResult( + tableId, + "{\"id\":1,\"min_time\":\"00:00:00\",\"max_time\":\"23:59:59\",\"midnight\":\"00:00:00\",\"__op\":0}", + result); + } + + @Test + void testTimeTypeWithNullValues() throws Exception { + TableId tableId = TableId.parse("test.time_null_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("nullable_time", new TimeType()) + .physicalColumn("not_null_time", new TimeType().notNull()) + .primaryKey("id") + .build(); + + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + Assertions.assertThat(serializer.serialize(createTableEvent)).isNull(); + + BinaryRecordDataGenerator generator = + new BinaryRecordDataGenerator(schema.getColumnDataTypes().toArray(new DataType[0])); + + // Test TIME values with null + DataChangeEvent insertEvent = + DataChangeEvent.insertEvent( + tableId, + generator.generate( + new Object[] { + 1, + null, // Null value for nullable column + TimeData.fromLocalTime( + LocalTime.of(12, 0, 0)) // Not null column + })); + + StarRocksRowData result = serializer.serialize(insertEvent); + Assertions.assertThat(result).isNotNull(); + + verifySerializeResult( + tableId, "{\"id\":1,\"not_null_time\":\"12:00:00\",\"__op\":0}", result); + } + private void verifySerializeResult( TableId expectTable, String expectRow, StarRocksRowData actualRowData) throws Exception { diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierITCase.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierITCase.java index e2dc50551da..6501ba03c26 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierITCase.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierITCase.java @@ -213,10 +213,10 @@ void testStarRocksDataType() throws Exception { .column(new PhysicalColumn("string", DataTypes.STRING(), "String")) .column(new PhysicalColumn("decimal", DataTypes.DECIMAL(17, 7), "Decimal")) .column(new PhysicalColumn("date", DataTypes.DATE(), "Date")) - // StarRocks sink doesn't support TIME type yet. - // .column(new PhysicalColumn("time", DataTypes.TIME(), "Time")) - // .column(new PhysicalColumn("time_3", DataTypes.TIME(3), "Time With - // Precision")) + .column(new PhysicalColumn("time", DataTypes.TIME(), "Time")) + .column( + new PhysicalColumn( + "time_3", DataTypes.TIME(3), "Time With Precision")) .column(new PhysicalColumn("timestamp", DataTypes.TIMESTAMP(), "Timestamp")) .column( new PhysicalColumn( @@ -256,6 +256,9 @@ void testStarRocksDataType() throws Exception { "string | varchar(1048576) | YES | false | null", "decimal | decimal(17,7) | YES | false | null", "date | date | YES | false | null", + // TIME type mapped to VARCHAR since StarRocks doesn't support TIME type + "time | varchar(8) | YES | false | null", + "time_3 | varchar(12) | YES | false | null", "timestamp | datetime | YES | false | null", "timestamp_3 | datetime | YES | false | null", "timestampltz | datetime | YES | false | null", diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierTest.java index 28d7e940442..2da34d6cf4f 100644 --- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierTest.java +++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-starrocks/src/test/java/org/apache/flink/cdc/connectors/starrocks/sink/StarRocksMetadataApplierTest.java @@ -28,6 +28,7 @@ import org.apache.flink.cdc.common.types.DecimalType; import org.apache.flink.cdc.common.types.IntType; import org.apache.flink.cdc.common.types.SmallIntType; +import org.apache.flink.cdc.common.types.TimeType; import org.apache.flink.cdc.common.types.TimestampType; import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; @@ -226,4 +227,150 @@ void testDropColumn() throws Exception { .build(); Assertions.assertThat(actualTable).isEqualTo(expectTable); } + + @Test + void testCreateTableWithTimeType() throws Exception { + TableId tableId = TableId.parse("test.time_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("start_time", new TimeType()) + .physicalColumn( + "end_time", new TimeType(3)) // TIME with millisecond precision + .primaryKey("id") + .build(); + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + metadataApplier.applySchemaChange(createTableEvent); + + StarRocksTable actualTable = + catalog.getTable(tableId.getSchemaName(), tableId.getTableName()).orElse(null); + Assertions.assertThat(actualTable).isNotNull(); + + List columns = new ArrayList<>(); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("id") + .setOrdinalPosition(0) + .setDataType("int") + .setNullable(true) + .build()); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("start_time") + .setOrdinalPosition(1) + .setDataType("varchar") + .setNullable(true) + .setColumnSize(8) + .build()); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("end_time") + .setOrdinalPosition(2) + .setDataType("varchar") + .setNullable(true) + .setColumnSize(12) + .build()); + StarRocksTable expectTable = + new StarRocksTable.Builder() + .setDatabaseName(tableId.getSchemaName()) + .setTableName(tableId.getTableName()) + .setTableType(StarRocksTable.TableType.PRIMARY_KEY) + .setColumns(columns) + .setTableKeys(schema.primaryKeys()) + .setDistributionKeys(schema.primaryKeys()) + .setNumBuckets(10) + .setTableProperties(Collections.singletonMap("replication_num", "5")) + .build(); + Assertions.assertThat(actualTable).isEqualTo(expectTable); + } + + @Test + void testAddTimeTypeColumn() throws Exception { + TableId tableId = TableId.parse("test.add_time_column"); + Schema schema = + Schema.newBuilder().physicalColumn("id", new IntType()).primaryKey("id").build(); + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + metadataApplier.applySchemaChange(createTableEvent); + + // Add TIME type column through schema evolution + AddColumnEvent addColumnEvent = + new AddColumnEvent( + tableId, + Arrays.asList( + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("duration", new TimeType())), + new AddColumnEvent.ColumnWithPosition( + Column.physicalColumn("precision_time", new TimeType(3))))); + metadataApplier.applySchemaChange(addColumnEvent); + + StarRocksTable actualTable = + catalog.getTable(tableId.getSchemaName(), tableId.getTableName()).orElse(null); + Assertions.assertThat(actualTable).isNotNull(); + + List columns = new ArrayList<>(); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("id") + .setOrdinalPosition(0) + .setDataType("int") + .setNullable(true) + .build()); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("duration") + .setOrdinalPosition(1) + .setDataType("varchar") + .setNullable(true) + .setColumnSize(8) + .build()); + columns.add( + new StarRocksColumn.Builder() + .setColumnName("precision_time") + .setOrdinalPosition(2) + .setDataType("varchar") + .setNullable(true) + .setColumnSize(12) + .build()); + StarRocksTable expectTable = + new StarRocksTable.Builder() + .setDatabaseName(tableId.getSchemaName()) + .setTableName(tableId.getTableName()) + .setTableType(StarRocksTable.TableType.PRIMARY_KEY) + .setColumns(columns) + .setTableKeys(schema.primaryKeys()) + .setDistributionKeys(schema.primaryKeys()) + .setNumBuckets(10) + .setTableProperties(Collections.singletonMap("replication_num", "5")) + .build(); + Assertions.assertThat(actualTable).isEqualTo(expectTable); + } + + @Test + void testTimeTypeWithDifferentPrecisions() throws Exception { + TableId tableId = TableId.parse("test.time_precision_table"); + Schema schema = + Schema.newBuilder() + .physicalColumn("id", new IntType()) + .physicalColumn("time_default", new TimeType()) // Default precision + .physicalColumn("time_0", new TimeType(0)) // Second precision + .physicalColumn("time_3", new TimeType(3)) // Millisecond precision + .physicalColumn("time_max", new TimeType(3)) // Example precision 3 + .primaryKey("id") + .build(); + CreateTableEvent createTableEvent = new CreateTableEvent(tableId, schema); + metadataApplier.applySchemaChange(createTableEvent); + + StarRocksTable actualTable = + catalog.getTable(tableId.getSchemaName(), tableId.getTableName()).orElse(null); + Assertions.assertThat(actualTable).isNotNull(); + + // Verify all TIME columns are correctly mapped to StarRocks VARCHAR type + // since StarRocks doesn't support TIME type + List timeColumns = Arrays.asList("time_default", "time_0", "time_3", "time_max"); + for (StarRocksColumn column : actualTable.getColumns()) { + if (timeColumns.contains(column.getColumnName())) { + Assertions.assertThat(column.getDataType().toLowerCase()).isEqualTo("varchar"); + } + } + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/pom.xml index fb8ba75bdc7..aca5a72ff5a 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/pom.xml @@ -76,12 +76,24 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-db2-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-db2-cdc/pom.xml index 0dc256531a0..0d15f2e9bef 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-db2-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-db2-cdc/pom.xml @@ -88,6 +88,12 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -102,6 +108,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-debezium/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-debezium/pom.xml index d1f2394895f..d1137e77fb5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-debezium/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-debezium/pom.xml @@ -63,6 +63,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + \ No newline at end of file diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/pom.xml index a3df26c36a4..1558ae943b7 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/pom.xml @@ -92,12 +92,24 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/utils/MongoDBContainer.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/utils/MongoDBContainer.java index 487d129f620..00edbb2aaa1 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/utils/MongoDBContainer.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/utils/MongoDBContainer.java @@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory; import org.testcontainers.containers.Network; import org.testcontainers.containers.output.OutputFrame; -import org.testcontainers.containers.wait.strategy.Wait; import java.io.IOException; import java.net.URL; @@ -88,7 +87,6 @@ protected void containerIsStarted(InspectContainerResponse containerInfo, boolea throw new IllegalStateException( "Execute mongo command failed " + execResult.getStderr()); } - this.waitingFor(Wait.forLogMessage("Flink test user created.\\s", 1)); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/pom.xml index 96366a9af91..5ce4f9a527b 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/pom.xml @@ -119,12 +119,24 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java index 1acbeac941b..b49e8f7cb37 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java @@ -224,8 +224,7 @@ else if (!isRemainingTablesCheckpointed private void captureNewlyAddedTables() { // Don't scan newly added table in snapshot mode. if (sourceConfig.isScanNewlyAddedTableEnabled() - && !sourceConfig.getStartupOptions().isSnapshotOnly() - && AssignerStatus.isAssigningFinished(assignerStatus)) { + && !sourceConfig.getStartupOptions().isSnapshotOnly()) { // check whether we got newly added tables try (JdbcConnection jdbc = DebeziumUtils.openJdbcConnection(sourceConfig)) { final List currentCapturedTables = @@ -248,6 +247,10 @@ private void captureNewlyAddedTables() { List newlyAddedTables = currentCapturedTables; // case 1: there are old tables to remove from state + // Table removal must happen regardless of assigner status. When a table + // is excluded after splits have been assigned but before they are finished, + // we must remove those splits to prevent the assigner from waiting indefinitely + // for splits that will never be reported as finished. if (!tablesToRemove.isEmpty()) { // remove unassigned tables/splits if it does not satisfy new table filter @@ -269,6 +272,11 @@ private void captureNewlyAddedTables() { alreadyProcessedTables.removeIf(tableId -> tablesToRemove.contains(tableId)); } + // Adding new tables should only happen when assigning is finished. + if (!AssignerStatus.isAssigningFinished(assignerStatus)) { + return; + } + // case 2: there are new tables to add if (!newlyAddedTables.isEmpty()) { LOG.info("Found newly added tables, start capture newly added tables process"); @@ -390,11 +398,12 @@ public List getFinishedSplitInfos() { new ArrayList<>(assignedSplits.values()); List finishedSnapshotSplitInfos = new ArrayList<>(); for (MySqlSchemalessSnapshotSplit split : assignedSnapshotSplit) { - BinlogOffset binlogOffset = splitFinishedOffsets.get(split.splitId()); + String splitId = split.splitId(); + BinlogOffset binlogOffset = splitFinishedOffsets.get(splitId); finishedSnapshotSplitInfos.add( new FinishedSnapshotSplitInfo( split.getTableId(), - split.splitId(), + splitId, split.getSplitStart(), split.getSplitEnd(), binlogOffset)); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/java/org/apache/flink/cdc/connectors/mysql/source/TableExclusionDuringSnapshotIT.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/java/org/apache/flink/cdc/connectors/mysql/source/TableExclusionDuringSnapshotIT.java new file mode 100644 index 00000000000..2a305871408 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/java/org/apache/flink/cdc/connectors/mysql/source/TableExclusionDuringSnapshotIT.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.mysql.source; + +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.cdc.connectors.mysql.debezium.DebeziumUtils; +import org.apache.flink.cdc.connectors.mysql.source.split.MySqlSnapshotSplit; +import org.apache.flink.cdc.connectors.mysql.source.utils.hooks.SnapshotPhaseHooks; +import org.apache.flink.cdc.connectors.mysql.testutils.UniqueDatabase; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.StateRecoveryOptions; +import org.apache.flink.core.execution.CheckpointingMode; +import org.apache.flink.core.execution.JobClient; +import org.apache.flink.core.execution.SavepointFormatType; +import org.apache.flink.runtime.checkpoint.CheckpointException; +import org.apache.flink.runtime.dispatcher.UnavailableDispatcherOperationException; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.RowRowConverter; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.util.CloseableIterator; +import org.apache.flink.util.ExceptionUtils; + +import io.debezium.connector.mysql.MySqlConnection; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.SQLException; +import java.time.ZoneId; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; + +/** + * IT test for FLINK-38334: MySQL CDC source gets stuck in INITIAL_ASSIGNING state when a table is + * excluded from configuration after splits have been assigned but before they are finished. + * + *

NOTE: The test uses a JUnit timeout as a CI safeguard. The test logic is deterministic: it + * either receives a binlog record (proving streaming mode) or blocks forever (bug exists). + * + *

When the bug exists, the test will timeout because: + * + *

    + *
  1. The savepoint captures table "a" splits in assignedSplits (assigned but not finished) + *
  2. After restart with table "a" excluded, the reader skips table "a" splits + *
  3. The enumerator waits for table "a" splits to be reported as finished (they never will be) + *
  4. allSnapshotSplitsFinished() never returns true → job stuck in INITIAL_ASSIGNING + *
+ * + *

When the fix is applied, the test passes because excluded table splits are cleaned up on + * restore, allowing the job to transition to streaming mode. + */ +public class TableExclusionDuringSnapshotIT extends MySqlSourceTestBase { + private static final UniqueDatabase DATABASE = + new UniqueDatabase(MYSQL_CONTAINER, "table_exclusion_snapshot", "mysqluser", "mysqlpw"); + private static final DataType DATA_TYPE = DataTypes.ROW(DataTypes.FIELD("id", DataTypes.INT())); + private static final RowRowConverter ROW_CONVERTER = RowRowConverter.create(DATA_TYPE); + private static final RowDataDebeziumDeserializeSchema DESERIALIZER = + RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType((RowType) DATA_TYPE.getLogicalType()) + .setResultTypeInfo( + InternalTypeInfo.of(TypeConversions.fromDataToLogicalType(DATA_TYPE))) + .build(); + + @BeforeEach + void setUp() { + DATABASE.createAndInitialize(); + } + + // Latches for coordinating between test thread and snapshot hook + // These are static because the hook is serialized and deserialized + private static volatile CountDownLatch hookTriggeredLatch; + private static volatile CountDownLatch savepointTakenLatch; + + /** + * Tests that excluding a table from configuration during INITIAL_ASSIGNING phase doesn't cause + * the source to get stuck. + * + *

Scenario: + * + *

    + *
  1. Start job capturing tables "a" and "b" with a blocking hook on table "a" + *
  2. Take savepoint while table "a" is being snapshotted (splits assigned but not finished) + *
  3. Restart with configuration excluding table "a" + *
  4. Insert a new record into table "b" + *
  5. Verify we receive the new record (proves job transitioned to streaming) + *
+ * + *

If the bug exists, the job will be stuck in INITIAL_ASSIGNING because the enumerator waits + * for table "a" splits to be reported as finished, but the reader skips them. + */ + @Test + @Timeout(120) + void testTableExclusionDuringInitialAssigning(@TempDir Path tempDir) throws Exception { + final String savepointDirectory = tempDir.toUri().toString(); + + executeSql("INSERT INTO a VALUES (1)"); + executeSql("INSERT INTO b VALUES (1)"); + executeSql("INSERT INTO b VALUES (2)"); + + // Phase 1: Take savepoint while table "a" splits are assigned but not finished + String savepointPath = + runJobAndSavepointDuringInitialAssigning(savepointDirectory, "a", "b"); + + // Phase 2: Restart with only table "b", verify streaming mode works + String binlogRecord = runJobFromSavepointAndVerifyStreaming(savepointPath, "b"); + Assertions.assertThat(binlogRecord).isEqualTo("+I[200]"); + } + + /** Starts a job and takes a savepoint while splits are assigned but not finished. */ + private String runJobAndSavepointDuringInitialAssigning( + String savepointDirectory, String... tableNames) throws Exception { + hookTriggeredLatch = new CountDownLatch(1); + savepointTakenLatch = new CountDownLatch(1); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(1); + env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE); + + MySqlSource source = createSourceWithBlockingHook(tableNames); + DataStreamSource stream = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "MySQL CDC Source"); + stream.print(); + + JobClient jobClient = env.executeAsync("Snapshot phase"); + + hookTriggeredLatch.await(); + String savepointPath = triggerSavepointWithRetry(jobClient, savepointDirectory); + savepointTakenLatch.countDown(); + + jobClient.cancel().get(); + return savepointPath; + } + + /** Restarts job from savepoint and verifies it transitions to streaming mode. */ + private String runJobFromSavepointAndVerifyStreaming(String savepointPath, String... tableNames) + throws Exception { + Configuration configuration = new Configuration(); + configuration.set(StateRecoveryOptions.SAVEPOINT_PATH, savepointPath); + + StreamExecutionEnvironment env = + StreamExecutionEnvironment.getExecutionEnvironment(configuration); + env.setParallelism(1); + env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE); + + MySqlSource source = createSourceBuilder(tableNames).build(); + DataStreamSource stream = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "MySQL CDC Source"); + + try (CloseableIterator iterator = stream.executeAndCollect()) { + // Consume snapshot records from table "b" (2 rows) + for (int i = 0; i < 2; i++) { + iterator.next(); + } + + // Insert a new record - if job is in streaming mode, we'll receive it + executeSql("INSERT INTO b VALUES (200)"); + + // This blocks forever if the job is stuck in INITIAL_ASSIGNING + return ROW_CONVERTER.toExternal(iterator.next()).toString(); + } + } + + private MySqlSourceBuilder createSourceBuilder(String... tableNames) { + return MySqlSource.builder() + .hostname(MYSQL_CONTAINER.getHost()) + .port(MYSQL_CONTAINER.getDatabasePort()) + .databaseList(DATABASE.getDatabaseName()) + .serverTimeZone("UTC") + .tableList( + Arrays.stream(tableNames) + .map(t -> DATABASE.getDatabaseName() + "." + t) + .toArray(String[]::new)) + .username(DATABASE.getUsername()) + .password(DATABASE.getPassword()) + .deserializer(DESERIALIZER) + .scanNewlyAddedTableEnabled(true); + } + + /** Creates a source with a hook that blocks on table "a" until the savepoint is taken. */ + private MySqlSource createSourceWithBlockingHook(String... tableNames) { + MySqlSource source = createSourceBuilder(tableNames).build(); + + SnapshotPhaseHooks hooks = new SnapshotPhaseHooks(); + hooks.setPostLowWatermarkAction( + (connection, split) -> { + MySqlSnapshotSplit snapshotSplit = (MySqlSnapshotSplit) split; + if (!snapshotSplit.getTableId().table().equals("a")) { + return; + } + hookTriggeredLatch.countDown(); + try { + savepointTakenLatch.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + source.setSnapshotHooks(hooks); + + return source; + } + + private String triggerSavepointWithRetry(JobClient jobClient, String savepointDirectory) + throws Exception { + int retryTimes = 0; + while (retryTimes < 600) { + try { + return jobClient + .triggerSavepoint(savepointDirectory, SavepointFormatType.DEFAULT) + .get(); + } catch (Exception e) { + // Retry if checkpoint triggering task is not yet ready + Optional checkpointException = + ExceptionUtils.findThrowable(e, CheckpointException.class); + if (checkpointException.isPresent() + && checkpointException + .get() + .getMessage() + .contains("Checkpoint triggering task")) { + Thread.sleep(100); + retryTimes++; + continue; + } + // Retry if job is still initializing + Optional dispatcherException = + ExceptionUtils.findThrowable( + e, UnavailableDispatcherOperationException.class); + if (dispatcherException.isPresent()) { + Thread.sleep(100); + retryTimes++; + continue; + } + throw e; + } + } + throw new RuntimeException("Failed to trigger savepoint after " + retryTimes + " retries"); + } + + private void executeSql(String... statements) throws SQLException { + Map properties = new HashMap<>(); + properties.put("database.hostname", MYSQL_CONTAINER.getHost()); + properties.put("database.port", String.valueOf(MYSQL_CONTAINER.getDatabasePort())); + properties.put("database.user", DATABASE.getUsername()); + properties.put("database.password", DATABASE.getPassword()); + properties.put("database.serverTimezone", ZoneId.of("UTC").toString()); + io.debezium.config.Configuration configuration = + io.debezium.config.Configuration.from(properties); + + try (MySqlConnection connection = + DebeziumUtils.createMySqlConnection(configuration, new Properties())) { + connection.execute("USE " + DATABASE.getDatabaseName()); + connection.execute(statements); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/resources/ddl/table_exclusion_snapshot.sql b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/resources/ddl/table_exclusion_snapshot.sql new file mode 100644 index 00000000000..cf4062dc3ef --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/test/resources/ddl/table_exclusion_snapshot.sql @@ -0,0 +1,26 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +-- DATABASE: table_exclusion_snapshot +CREATE TABLE a ( + id INT NOT NULL, + PRIMARY KEY (id) +); + +CREATE TABLE b ( + id INT NOT NULL, + PRIMARY KEY (id) +); + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-oracle-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-oracle-cdc/pom.xml index 4e55ab47c88..28b5daa2c85 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-oracle-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-oracle-cdc/pom.xml @@ -107,6 +107,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -175,6 +181,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/pom.xml index b5a1a29a67e..ec73c783ac9 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/pom.xml @@ -107,6 +107,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -170,6 +176,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/fetch/PostgresScanFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/fetch/PostgresScanFetchTask.java index f31db75bd83..1915aca809c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/fetch/PostgresScanFetchTask.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/fetch/PostgresScanFetchTask.java @@ -287,12 +287,17 @@ private void createDataEventsForTable( .filter(field -> table.columnWithName(field).typeName().equals("uuid")) .collect(Collectors.toList()); + List columnNames = + table.columns().stream() + .map(column -> jdbcConnection.quotedColumnIdString(column.name())) + .collect(Collectors.toList()); final String selectSql = PostgresQueryUtils.buildSplitScanQuery( snapshotSplit.getTableId(), snapshotSplit.getSplitKeyType(), snapshotSplit.getSplitStart() == null, snapshotSplit.getSplitEnd() == null, + columnNames, uuidFields); LOG.debug( "For split '{}' of table {} using select statement: '{}'", diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/utils/PostgresQueryUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/utils/PostgresQueryUtils.java index 9c0d558474c..348344bab14 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/utils/PostgresQueryUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-postgres-cdc/src/main/java/org/apache/flink/cdc/connectors/postgres/source/utils/PostgresQueryUtils.java @@ -31,7 +31,6 @@ import java.util.Iterator; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; import static org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.rowToArray; @@ -156,17 +155,16 @@ public static String buildSplitScanQuery( boolean isFirstSplit, boolean isLastSplit, List uuidFields) { - return buildSplitQuery(tableId, pkRowType, isFirstSplit, isLastSplit, uuidFields, -1, true); + return buildSplitScanQuery(tableId, pkRowType, isFirstSplit, isLastSplit, null, uuidFields); } - private static String buildSplitQuery( + public static String buildSplitScanQuery( TableId tableId, RowType pkRowType, boolean isFirstSplit, boolean isLastSplit, - List uuidFields, - int limitSize, - boolean isScanningData) { + List columnNames, + List uuidFields) { final String condition; if (isFirstSplit && isLastSplit) { @@ -174,11 +172,9 @@ private static String buildSplitQuery( } else if (isFirstSplit) { final StringBuilder sql = new StringBuilder(); addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ", uuidFields); - if (isScanningData) { - sql.append(" AND NOT ("); - addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ", uuidFields); - sql.append(")"); - } + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ", uuidFields); + sql.append(")"); condition = sql.toString(); } else if (isLastSplit) { final StringBuilder sql = new StringBuilder(); @@ -187,30 +183,19 @@ private static String buildSplitQuery( } else { final StringBuilder sql = new StringBuilder(); addPrimaryKeyColumnsToCondition(pkRowType, sql, " >= ", uuidFields); - if (isScanningData) { - sql.append(" AND NOT ("); - addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ", uuidFields); - sql.append(")"); - } + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ", uuidFields); + sql.append(")"); sql.append(" AND "); addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ", uuidFields); condition = sql.toString(); } - if (isScanningData) { - return buildSelectWithRowLimits( - tableId, limitSize, "*", Optional.ofNullable(condition), Optional.empty()); - } else { - final String orderBy = - pkRowType.getFieldNames().stream().collect(Collectors.joining(", ")); - return buildSelectWithBoundaryRowLimits( - tableId, - limitSize, - getPrimaryKeyColumnsProjection(pkRowType), - getMaxPrimaryKeyColumnsProjection(pkRowType), - Optional.ofNullable(condition), - orderBy); - } + return buildSelectWithRowLimits( + tableId, + columnNames == null ? "*" : String.join(",", columnNames), + Optional.ofNullable(condition), + Optional.empty()); } public static PreparedStatement readTableSplitDataStatement( @@ -330,7 +315,6 @@ private static String getMaxPrimaryKeyColumnsProjection(RowType pkRowType) { private static String buildSelectWithRowLimits( TableId tableId, - int limit, String projection, Optional condition, Optional orderBy) { @@ -343,9 +327,6 @@ private static String buildSelectWithRowLimits( if (orderBy.isPresent()) { sql.append(" ORDER BY ").append(orderBy.get()); } - if (limit > 0) { - sql.append(" LIMIT ").append(limit); - } return sql.toString(); } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-sqlserver-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-sqlserver-cdc/pom.xml index f8413b89bcd..08dab404bfd 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-sqlserver-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-sqlserver-cdc/pom.xml @@ -88,6 +88,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -150,6 +156,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-test-util/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-test-util/pom.xml index 22e76aa4fde..97d66adcb1c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-test-util/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-test-util/pom.xml @@ -46,6 +46,12 @@ limitations under the License. org.apache.flink flink-test-utils ${flink.version} + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml index cd09cde5354..6d87970920e 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml @@ -75,6 +75,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + @@ -82,6 +88,12 @@ limitations under the License. flink-connector-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-vitess-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-vitess-cdc/pom.xml index 39354246ed8..53a6006bf3f 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-vitess-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-vitess-cdc/pom.xml @@ -102,6 +102,12 @@ limitations under the License. flink-test-utils ${flink.version} test + + + org.testcontainers + testcontainers + + diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/pom.xml b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/pom.xml index a083ce5619b..8c263f8de86 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/pom.xml +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/pom.xml @@ -42,7 +42,7 @@ limitations under the License. 1.6.8 1.10.1 2.3.9 - 0.7.0 + 0.9.0-incubating 1.37 1.1.0 @@ -673,10 +673,19 @@ limitations under the License. - com.alibaba.fluss - fluss-flink-${flink.major.version} + org.apache.fluss + fluss-flink-${flink-major-1.20} ${fluss.version} - fluss-sql-connector.jar + fluss-flink-${flink-1.20}.jar + jar + ${project.build.directory}/dependencies + + + + org.apache.fluss + fluss-flink-${flink-major-1.19} + ${fluss.version} + fluss-flink-${flink-1.19}.jar jar ${project.build.directory}/dependencies diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/FlussE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/FlussE2eITCase.java index c20808fa3c8..8f12e20c151 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/FlussE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/java/org/apache/flink/cdc/pipeline/tests/FlussE2eITCase.java @@ -53,7 +53,7 @@ public class FlussE2eITCase extends PipelineTestEnvironment { private static final Logger LOG = LoggerFactory.getLogger(FlussE2eITCase.class); private static final Duration FLUSS_TESTCASE_TIMEOUT = Duration.ofMinutes(3); - private static final String flussImageTag = "fluss/fluss:0.7.0"; + private static final String flussImageTag = "apache/fluss:0.9.0-incubating"; private static final String zooKeeperImageTag = "zookeeper:3.9.2"; private static final List flussCoordinatorProperties = @@ -64,7 +64,7 @@ public class FlussE2eITCase extends PipelineTestEnvironment { "remote.data.dir: /tmp/fluss/remote-data", "security.protocol.map: CLIENT:SASL, INTERNAL:PLAINTEXT", "security.sasl.enabled.mechanisms: PLAIN", - "security.sasl.plain.jaas.config: com.alibaba.fluss.security.auth.sasl.plain.PlainLoginModule required user_admin=\"admin-pass\" user_developer=\"developer-pass\";", + "security.sasl.plain.jaas.config: org.apache.fluss.security.auth.sasl.plain.PlainLoginModule required user_admin=\"admin-pass\" user_developer=\"developer-pass\";", "super.users: User:admin"); private static final List flussTabletServerProperties = @@ -78,7 +78,7 @@ public class FlussE2eITCase extends PipelineTestEnvironment { "remote.data.dir: /tmp/fluss/remote-data", "security.protocol.map: CLIENT:SASL, INTERNAL:PLAINTEXT", "security.sasl.enabled.mechanisms: PLAIN", - "security.sasl.plain.jaas.config: com.alibaba.fluss.security.auth.sasl.plain.PlainLoginModule required user_admin=\"admin-pass\" user_developer=\"developer-pass\";", + "security.sasl.plain.jaas.config: org.apache.fluss.security.auth.sasl.plain.PlainLoginModule required user_admin=\"admin-pass\" user_developer=\"developer-pass\";", "super.users: User:admin"); @Container @@ -129,7 +129,7 @@ protected List copyJarToFlinkLib() { // Due to a bug described in https://github.com/apache/fluss/pull/1267, it's not viable to // pass Fluss dependency with `--jar` CLI option. We may remove this workaround and use // `submitPipelineJob` to carry extra jar later. - return Collections.singletonList("fluss-sql-connector.jar"); + return Collections.singletonList(String.format("fluss-flink-%s.jar", flinkVersion)); } @BeforeEach diff --git a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/resources/rules/unexpected.yaml b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/resources/rules/unexpected.yaml index 293f76cd455..ff7339a4c51 100644 --- a/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/resources/rules/unexpected.yaml +++ b/flink-cdc-e2e-tests/flink-cdc-pipeline-e2e-tests/src/test/resources/rules/unexpected.yaml @@ -79,5 +79,5 @@ steps: language: clojure error: | Unexpected key `language` in YAML UDF block. - Allowed keys in this context are: [name, classpath] + Allowed keys in this context are: [name, classpath, options] Note: option language: "clojure" is unexpected. It was silently ignored in previous versions, and probably should be removed. diff --git a/flink-cdc-pipeline-model/pom.xml b/flink-cdc-pipeline-model/pom.xml index cf51c1cc387..e7dba7f6f66 100644 --- a/flink-cdc-pipeline-model/pom.xml +++ b/flink-cdc-pipeline-model/pom.xml @@ -42,6 +42,12 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + + dev.langchain4j diff --git a/flink-cdc-pipeline-udf-examples/src/main/java/org/apache/flink/cdc/udf/examples/java/ConfigurableFunctionClass.java b/flink-cdc-pipeline-udf-examples/src/main/java/org/apache/flink/cdc/udf/examples/java/ConfigurableFunctionClass.java new file mode 100644 index 00000000000..1ce7309b726 --- /dev/null +++ b/flink-cdc-pipeline-udf-examples/src/main/java/org/apache/flink/cdc/udf/examples/java/ConfigurableFunctionClass.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.udf.examples.java; + +import org.apache.flink.cdc.common.configuration.ConfigOption; +import org.apache.flink.cdc.common.configuration.ConfigOptions; +import org.apache.flink.cdc.common.udf.UserDefinedFunction; +import org.apache.flink.cdc.common.udf.UserDefinedFunctionContext; + +/** This is an example UDF class that reads options from configuration. */ +public class ConfigurableFunctionClass implements UserDefinedFunction { + + private static final ConfigOption GREETING = + ConfigOptions.key("greeting").stringType().defaultValue("Hello"); + + private static final ConfigOption SUFFIX = + ConfigOptions.key("suffix").stringType().defaultValue("!"); + + private String greeting; + private String suffix; + + public String eval(String value) { + return greeting + " " + value + suffix; + } + + @Override + public void open(UserDefinedFunctionContext context) throws Exception { + greeting = context.configuration().get(GREETING); + suffix = context.configuration().get(SUFFIX); + } + + @Override + public void close() throws Exception {} +} diff --git a/flink-cdc-pipeline-udf-examples/src/main/scala/org/apache/flink/cdc/udf/examples/scala/ConfigurableFunctionClass.scala b/flink-cdc-pipeline-udf-examples/src/main/scala/org/apache/flink/cdc/udf/examples/scala/ConfigurableFunctionClass.scala new file mode 100644 index 00000000000..6405ef715ff --- /dev/null +++ b/flink-cdc-pipeline-udf-examples/src/main/scala/org/apache/flink/cdc/udf/examples/scala/ConfigurableFunctionClass.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.udf.examples.scala + +import org.apache.flink.cdc.common.configuration.ConfigOptions +import org.apache.flink.cdc.common.udf.{UserDefinedFunction, UserDefinedFunctionContext} + +/** This is an example UDF class that reads options from configuration. */ +class ConfigurableFunctionClass extends UserDefinedFunction { + + private var greeting: String = "Hello" + private var suffix: String = "!" + + def eval(value: String): String = { + greeting + " " + value + suffix + } + + override def open(context: UserDefinedFunctionContext): Unit = { + greeting = context.configuration().get(ConfigurableFunctionClass.GREETING) + suffix = context.configuration().get(ConfigurableFunctionClass.SUFFIX) + } + + override def close(): Unit = {} +} + +object ConfigurableFunctionClass { + private val GREETING = ConfigOptions.key("greeting").stringType().defaultValue("Hello") + private val SUFFIX = ConfigOptions.key("suffix").stringType().defaultValue("!") +} diff --git a/flink-cdc-runtime/pom.xml b/flink-cdc-runtime/pom.xml index 6b1ea91a9dd..bb1c78fbba5 100644 --- a/flink-cdc-runtime/pom.xml +++ b/flink-cdc-runtime/pom.xml @@ -68,6 +68,12 @@ limitations under the License. flink-test-utils-junit ${flink.version} test + + + org.testcontainers + testcontainers + + org.apache.flink diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java index bd3379dd8a6..f977089b47a 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/schema/common/SchemaManagerTest.java @@ -27,8 +27,9 @@ import org.apache.flink.cdc.common.schema.Schema; import org.apache.flink.cdc.common.types.DataTypes; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.List; diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java index e50ed0ca7f8..22e0cfbd342 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PostTransformOperatorTest.java @@ -36,11 +36,12 @@ import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + import org.apache.calcite.runtime.CalciteContextException; import org.apache.calcite.sql.validate.SqlValidatorException; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import java.math.BigDecimal; diff --git a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java index a6b6717334d..0425751d79a 100644 --- a/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java +++ b/flink-cdc-runtime/src/test/java/org/apache/flink/cdc/runtime/operators/transform/PreTransformOperatorTest.java @@ -32,9 +32,10 @@ import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; + import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import java.util.Collections; diff --git a/pom.xml b/pom.xml index 61d657a91ce..3d87b69054f 100644 --- a/pom.xml +++ b/pom.xml @@ -73,7 +73,7 @@ limitations under the License. 1.9.8.Final 3.2.0 2.2.0 - 1.18.3 + 1.21.4 1.3 4.2.0 1.7.36