-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[FLINK-36165][source-connector/mysql] Support capturing snapshot data with conditions #3776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
8a8d65c
f1b8776
ec83fbe
96b885a
36ae871
e59f5e8
8ee4c92
5f913f4
e50667d
e6dd29e
ed9ddcc
794513b
3af4044
ca12a3f
c025e29
3d360c9
67fae6d
b7f13fd
72724f1
8ae2003
2a0bdf2
85647b2
cc477d5
9e8b5f5
0d14a1b
2d8a8ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,7 @@ | |
| import java.util.Arrays; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.LinkedHashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
|
|
@@ -83,6 +84,7 @@ | |
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_NEWLY_ADDED_TABLE_ENABLED; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FILTERS; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_MODE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_FILE; | ||
| import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET; | ||
|
|
@@ -112,6 +114,8 @@ public class MySqlDataSourceFactory implements DataSourceFactory { | |
| private static final Logger LOG = LoggerFactory.getLogger(MySqlDataSourceFactory.class); | ||
|
|
||
| public static final String IDENTIFIER = "mysql"; | ||
| public static final String SNAPSHOT_FILTER_TABLE_KEY = "table"; | ||
| public static final String SNAPSHOT_FILTER_FILTER_KEY = "filter"; | ||
|
|
||
| @Override | ||
| public DataSource createDataSource(Context context) { | ||
|
|
@@ -284,11 +288,78 @@ public DataSource createDataSource(Context context) { | |
| LOG.info("Add chunkKeyColumn {}.", chunkKeyColumnMap); | ||
| configFactory.chunkKeyColumn(chunkKeyColumnMap); | ||
| } | ||
|
|
||
| List<Map<String, String>> snapshotFilters = config.get(SCAN_SNAPSHOT_FILTERS); | ||
| if (snapshotFilters != null && !snapshotFilters.isEmpty()) { | ||
| Map<String, String> snapshotFiltersMap = | ||
| parseAndValidateSnapshotFilters(snapshotFilters); | ||
| LOG.info("Add snapshotFilters {}.", snapshotFiltersMap); | ||
| configFactory.snapshotFilters(snapshotFiltersMap); | ||
| } | ||
|
Comment on lines
+292
to
+298
|
||
|
|
||
| String metadataList = config.get(METADATA_LIST); | ||
| List<MySqlReadableMetadata> readableMetadataList = listReadableMetadata(metadataList); | ||
| return new MySqlDataSource(configFactory, readableMetadataList); | ||
| } | ||
|
|
||
| /** | ||
| * Parses and validates snapshot filters configuration. | ||
| * | ||
| * @param snapshotFilters List of filter entries, each containing 'table' and 'filter' keys | ||
| * @return LinkedHashMap preserving insertion order, mapping table patterns to filter | ||
| * expressions | ||
| * @throws ValidationException If any entry is missing required keys or contains duplicate table | ||
| * patterns | ||
| */ | ||
| private Map<String, String> parseAndValidateSnapshotFilters( | ||
| List<Map<String, String>> snapshotFilters) { | ||
| Map<String, String> result = new LinkedHashMap<>(); | ||
|
|
||
| for (int i = 0; i < snapshotFilters.size(); i++) { | ||
| Map<String, String> entry = snapshotFilters.get(i); | ||
|
|
||
| // Validate required keys | ||
| String table = entry.get(SNAPSHOT_FILTER_TABLE_KEY); | ||
| String filter = entry.get(SNAPSHOT_FILTER_FILTER_KEY); | ||
|
|
||
| if (table == null || table.trim().isEmpty()) { | ||
| throw new ValidationException( | ||
| String.format( | ||
| "Snapshot filter entry at index %d is missing required key '%s'. " | ||
| + "Each entry must contain both '%s' and '%s' keys.", | ||
| i, | ||
| SNAPSHOT_FILTER_TABLE_KEY, | ||
| SNAPSHOT_FILTER_TABLE_KEY, | ||
| SNAPSHOT_FILTER_FILTER_KEY)); | ||
| } | ||
|
|
||
| if (filter == null || filter.trim().isEmpty()) { | ||
| throw new ValidationException( | ||
| String.format( | ||
| "Snapshot filter entry at index %d is missing required key '%s'. " | ||
| + "Each entry must contain both '%s' and '%s' keys.", | ||
| i, | ||
| SNAPSHOT_FILTER_FILTER_KEY, | ||
| SNAPSHOT_FILTER_TABLE_KEY, | ||
| SNAPSHOT_FILTER_FILTER_KEY)); | ||
| } | ||
|
|
||
| // Check for duplicates | ||
| if (result.containsKey(table)) { | ||
| throw new ValidationException( | ||
| String.format( | ||
| "Duplicate table pattern '%s' found in snapshot filters at index %d. " | ||
| + "Each table pattern can only appear once. " | ||
| + "Previous definition: '%s', Current definition: '%s'.", | ||
| table, i, result.get(table), filter)); | ||
| } | ||
|
|
||
| result.put(table, filter); | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| private List<MySqlReadableMetadata> listReadableMetadata(String metadataList) { | ||
| if (StringUtils.isNullOrWhitespaceOnly(metadataList)) { | ||
| return new ArrayList<>(); | ||
|
|
@@ -358,6 +429,7 @@ public Set<ConfigOption<?>> optionalOptions() { | |
| options.add(PARSE_ONLINE_SCHEMA_CHANGES); | ||
| options.add(SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED); | ||
| options.add(SCAN_INCREMENTAL_SNAPSHOT_BACKFILL_SKIP); | ||
| options.add(SCAN_SNAPSHOT_FILTERS); | ||
| return options; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is better to add a new use example for this feature.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added usage examples in commit cc477d5:
Filters" section with a YAML pipeline example demonstrating the
scan.snapshot.filters list syntax.
Filter" section with both SQL DDL and DataStream API examples
showing the scan.snapshot.filter option usage.