-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[FLINK-36165][source-connector/mysql] Support capturing snapshot data with conditions #3776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 10 commits
8a8d65c
f1b8776
ec83fbe
96b885a
36ae871
e59f5e8
8ee4c92
5f913f4
e50667d
e6dd29e
ed9ddcc
794513b
3af4044
ca12a3f
c025e29
3d360c9
67fae6d
b7f13fd
72724f1
8ae2003
2a0bdf2
85647b2
cc477d5
9e8b5f5
0d14a1b
2d8a8ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,7 @@ | |
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_NEWLY_ADDED_TABLE_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FILTERS; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_MODE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_FILE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET; | ||
|
|
@@ -113,6 +114,8 @@ public class MySqlDataSourceFactory implements DataSourceFactory { | |
| private static final Logger LOG = LoggerFactory.getLogger(MySqlDataSourceFactory.class); | ||
|
|
||
| public static final String IDENTIFIER = "mysql"; | ||
| public static final String SNAPSHOT_FILTER_TABLE_KEY = "table"; | ||
| public static final String SNAPSHOT_FILTER_FILTER_KEY = "filter"; | ||
|
|
||
| @Override | ||
| public DataSource createDataSource(Context context) { | ||
|
|
@@ -285,6 +288,19 @@ public DataSource createDataSource(Context context) { | |
| LOG.info("Add chunkKeyColumn {}.", chunkKeyColumnMap); | ||
| configFactory.chunkKeyColumn(chunkKeyColumnMap); | ||
| } | ||
|
|
||
| List<Map<String, String>> snapshotFilters = config.get(SCAN_SNAPSHOT_FILTERS); | ||
| if (snapshotFilters != null && !snapshotFilters.isEmpty()) { | ||
| Map<String, String> snapshotFiltersMap = | ||
| snapshotFilters.stream() | ||
| .collect( | ||
| Collectors.toMap( | ||
| it -> it.get(SNAPSHOT_FILTER_TABLE_KEY), | ||
| it -> it.get(SNAPSHOT_FILTER_FILTER_KEY))); | ||
| LOG.info("Add snapshotFilters {}.", snapshotFiltersMap); | ||
| configFactory.snapshotFilters(snapshotFiltersMap); | ||
| } | ||
|
Comment on lines
+292
to
+298
|
||
|
|
||
| String metadataList = config.get(METADATA_LIST); | ||
| List<MySqlReadableMetadata> readableMetadataList = listReadableMetadata(metadataList); | ||
| return new MySqlDataSource(configFactory, readableMetadataList); | ||
|
|
@@ -359,6 +375,7 @@ public Set<ConfigOption<?>> optionalOptions() { | |
| options.add(PARSE_ONLINE_SCHEMA_CHANGES); | ||
| options.add(SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED); | ||
| options.add(SCAN_INCREMENTAL_SNAPSHOT_BACKFILL_SKIP); | ||
| options.add(SCAN_SNAPSHOT_FILTERS); | ||
| return options; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| import java.sql.Connection; | ||
| import java.sql.SQLException; | ||
| import java.sql.Statement; | ||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
|
|
@@ -43,6 +44,7 @@ | |
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_BINLOG_NEWLY_ADDED_TABLE_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FILTERS; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TABLES; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TABLES_EXCLUDE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TREAT_TINYINT1_AS_BOOLEAN_ENABLED; | ||
|
|
@@ -330,6 +332,58 @@ void testAddChunkKeyColumns() { | |
| }); | ||
| } | ||
|
|
||
| @Test | ||
| void testAddSnapshotFilters() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need more tests for this feature(IT, E2E test).
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added two tests in commit 85647b2:
|
||
| inventoryDatabase.createAndInitialize(); | ||
| Map<String, Object> options = new HashMap<>(); | ||
| options.put(HOSTNAME.key(), MYSQL_CONTAINER.getHost()); | ||
| options.put(PORT.key(), String.valueOf(MYSQL_CONTAINER.getDatabasePort())); | ||
| options.put(USERNAME.key(), TEST_USER); | ||
| options.put(PASSWORD.key(), TEST_PASSWORD); | ||
| options.put(TABLES.key(), inventoryDatabase.getDatabaseName() + ".\\.*"); | ||
| options.put(SCAN_SNAPSHOT_FILTERS.key(), mockSnapshotFilters()); | ||
| Factory.Context context = new MockContext(Configuration.fromMap(options)); | ||
|
|
||
| MySqlDataSourceFactory factory = new MySqlDataSourceFactory(); | ||
| MySqlDataSource dataSource = (MySqlDataSource) factory.createDataSource(context); | ||
|
|
||
| assertThat(dataSource.getSourceConfig().getSnapshotFilters()) | ||
| .isNotEmpty() | ||
| .isEqualTo( | ||
| new HashMap<String, String>() { | ||
| { | ||
| put( | ||
| inventoryDatabase.getDatabaseName() + ".multi_max_\\.*", | ||
| "id > 200"); | ||
| put(inventoryDatabase.getDatabaseName() + ".products", "1 = 0"); | ||
| put( | ||
| inventoryDatabase.getDatabaseName() + ".customers", | ||
| "city != 'China:beijing'"); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| private List<Map<String, String>> mockSnapshotFilters() { | ||
| List<Map<String, String>> snapshotFilters = new ArrayList<>(); | ||
|
|
||
| Map<String, String> filter1 = new HashMap<>(); | ||
| filter1.put("table", inventoryDatabase.getDatabaseName() + ".multi_max_\\.*"); | ||
| filter1.put("filter", "id > 200"); | ||
| snapshotFilters.add(filter1); | ||
|
|
||
| Map<String, String> filter2 = new HashMap<>(); | ||
| filter2.put("table", inventoryDatabase.getDatabaseName() + ".products"); | ||
| filter2.put("filter", "1 = 0"); | ||
| snapshotFilters.add(filter2); | ||
|
|
||
| Map<String, String> filter3 = new HashMap<>(); | ||
| filter3.put("table", inventoryDatabase.getDatabaseName() + ".customers"); | ||
| filter3.put("filter", "city != 'China:beijing'"); | ||
| snapshotFilters.add(filter3); | ||
|
|
||
| return snapshotFilters; | ||
| } | ||
|
|
||
| class MockContext implements Factory.Context { | ||
|
|
||
| Configuration factoryConfiguration; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is better to add a new use example for this feature.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added usage examples in commit cc477d5:
Filters" section with a YAML pipeline example demonstrating the
scan.snapshot.filters list syntax.
Filter" section with both SQL DDL and DataStream API examples
showing the scan.snapshot.filter option usage.