diff --git a/docs/content/docs/connectors/flink-sources/mysql-cdc.md b/docs/content/docs/connectors/flink-sources/mysql-cdc.md
index 2752298c258..cfe38943737 100644
--- a/docs/content/docs/connectors/flink-sources/mysql-cdc.md
+++ b/docs/content/docs/connectors/flink-sources/mysql-cdc.md
@@ -460,6 +460,13 @@ Only valid for cdc 1.x version. During a snapshot operation, the connector will
For example updating an already updated value in snapshot, or deleting an already deleted entry in snapshot. These replayed change log events should be handled specially.
+
+ | scan.snapshot.filter |
+ optional |
+ (none) |
+ String |
+ When reading a table snapshot, the rows of captured tables will be filtered using the specified filter expression (AKA a SQL WHERE clause). By default, no filter is applied, meaning the entire table will be synchronized. e.g. `id > 100`. |
+
diff --git a/docs/content/docs/connectors/pipeline-connectors/mysql.md b/docs/content/docs/connectors/pipeline-connectors/mysql.md
index a8964cdaffb..c7eab6cb786 100644
--- a/docs/content/docs/connectors/pipeline-connectors/mysql.md
+++ b/docs/content/docs/connectors/pipeline-connectors/mysql.md
@@ -293,6 +293,17 @@ pipeline:
scan.binlog.newly-added-table.enabled: only do binlog-reading for newly added table during binlog reading phase.
+
+ | scan.snapshot.filters |
+ optional |
+ (none) |
+ String |
+ When reading a table snapshot, the rows of captured tables will be filtered using the specified filter expression (AKA a SQL WHERE clause).
+ By default, no filter is applied, meaning the entire table will be synchronized.
+ A colon (:) separates table name and filter expression, while a semicolon (;) separate multiple filters,
+ e.g. `db1.user_table_[0-9]+:id > 100;db[1-2].[app|web]_order_\\.*:id < 0;`.
+ |
+
| scan.parse.online.schema.changes.enabled |
optional |
diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/factory/MySqlDataSourceFactory.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/factory/MySqlDataSourceFactory.java
index f522adfdb1a..13418bc52d5 100644
--- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/factory/MySqlDataSourceFactory.java
+++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/factory/MySqlDataSourceFactory.java
@@ -84,6 +84,7 @@
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_NEWLY_ADDED_TABLE_ENABLED;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FETCH_SIZE;
+import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FILTERS;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_MODE;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_FILE;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET;
@@ -285,6 +286,29 @@ public DataSource createDataSource(Context context) {
LOG.info("Add chunkKeyColumn {}.", chunkKeyColumnMap);
configFactory.chunkKeyColumn(chunkKeyColumnMap);
}
+ String snapshotFilters = config.get(SCAN_SNAPSHOT_FILTERS);
+ if (snapshotFilters != null) {
+ Map snapshotFilterMap = new HashMap<>();
+ for (String snapshotFilter : snapshotFilters.split("(? readableMetadataList = listReadableMetadata(metadataList);
return new MySqlDataSource(configFactory, readableMetadataList);
@@ -352,6 +376,7 @@ public Set> optionalOptions() {
options.add(CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND);
options.add(CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND);
options.add(SCAN_BINLOG_NEWLY_ADDED_TABLE_ENABLED);
+ options.add(SCAN_SNAPSHOT_FILTERS);
options.add(METADATA_LIST);
options.add(INCLUDE_COMMENTS_ENABLED);
options.add(USE_LEGACY_JSON_FORMAT);
diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceOptions.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceOptions.java
index 6aff556e7fa..d2845996d0a 100644
--- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceOptions.java
+++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceOptions.java
@@ -273,6 +273,17 @@ public class MySqlDataSourceOptions {
+ "scan.newly-added-table.enabled: do re-snapshot & binlog-reading for newly added table when restored; \n"
+ "scan.binlog.newly-added-table.enabled: only do binlog-reading for newly added table during binlog reading phase.");
+ @Experimental
+ public static final ConfigOption SCAN_SNAPSHOT_FILTERS =
+ ConfigOptions.key("scan.snapshot.filters")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "When reading a table snapshot, the rows of captured tables will be filtered using the specified filter expression (AKA a SQL WHERE clause). "
+ + "By default, no filter is applied, meaning the entire table will be synchronized. "
+ + "A colon (:) separates table name and filter expression, while a semicolon (;) separate multiple filters, "
+ + "e.g. `db1.user_table_[0-9]+:id > 100;db[1-2].[app|web]_order_\\.*:id < 0;`.");
+
@Experimental
public static final ConfigOption METADATA_LIST =
ConfigOptions.key("metadata.list")
diff --git a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/test/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/test/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceFactoryTest.java
index 74f3ef52a3c..85d06a2d712 100644
--- a/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/test/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceFactoryTest.java
+++ b/flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-mysql/src/test/java/org/apache/flink/cdc/connectors/mysql/source/MySqlDataSourceFactoryTest.java
@@ -43,6 +43,7 @@
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_BINLOG_NEWLY_ADDED_TABLE_ENABLED;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_UNBOUNDED_CHUNK_FIRST_ENABLED;
+import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.SCAN_SNAPSHOT_FILTERS;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TABLES;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TABLES_EXCLUDE;
import static org.apache.flink.cdc.connectors.mysql.source.MySqlDataSourceOptions.TREAT_TINYINT1_AS_BOOLEAN_ENABLED;
@@ -330,6 +331,39 @@ void testAddChunkKeyColumns() {
});
}
+ @Test
+ public void testAddSnapshotFilters() {
+ inventoryDatabase.createAndInitialize();
+ Map options = new HashMap<>();
+ options.put(HOSTNAME.key(), MYSQL_CONTAINER.getHost());
+ options.put(PORT.key(), String.valueOf(MYSQL_CONTAINER.getDatabasePort()));
+ options.put(USERNAME.key(), TEST_USER);
+ options.put(PASSWORD.key(), TEST_PASSWORD);
+ options.put(TABLES.key(), inventoryDatabase.getDatabaseName() + ".\\.*");
+ options.put(
+ SCAN_SNAPSHOT_FILTERS.key(),
+ inventoryDatabase.getDatabaseName()
+ + ".multi_max_\\.*:id > 200;"
+ + inventoryDatabase.getDatabaseName()
+ + ".products:1 = 0;");
+ Factory.Context context = new MockContext(Configuration.fromMap(options));
+
+ MySqlDataSourceFactory factory = new MySqlDataSourceFactory();
+ MySqlDataSource dataSource = (MySqlDataSource) factory.createDataSource(context);
+
+ assertThat(dataSource.getSourceConfig().getSnapshotFilters())
+ .isNotEmpty()
+ .isEqualTo(
+ new HashMap() {
+ {
+ put(
+ inventoryDatabase.getDatabaseName() + ".multi_max_\\.*",
+ "id > 200");
+ put(inventoryDatabase.getDatabaseName() + ".products", "1 = 0");
+ }
+ });
+ }
+
class MockContext implements Factory.Context {
Configuration factoryConfiguration;
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/debezium/task/MySqlSnapshotSplitReadTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/debezium/task/MySqlSnapshotSplitReadTask.java
index f729f59d05f..c907d477780 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/debezium/task/MySqlSnapshotSplitReadTask.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/debezium/task/MySqlSnapshotSplitReadTask.java
@@ -24,6 +24,7 @@
import org.apache.flink.cdc.connectors.mysql.source.config.MySqlSourceConfig;
import org.apache.flink.cdc.connectors.mysql.source.offset.BinlogOffset;
import org.apache.flink.cdc.connectors.mysql.source.split.MySqlSnapshotSplit;
+import org.apache.flink.cdc.connectors.mysql.source.utils.SnapshotFilterUtils;
import org.apache.flink.cdc.connectors.mysql.source.utils.StatementUtils;
import org.apache.flink.cdc.connectors.mysql.source.utils.hooks.SnapshotPhaseHooks;
@@ -242,12 +243,24 @@ private void createDataEventsForTable(
long exportStart = clock.currentTimeInMillis();
LOG.info("Exporting data from split '{}' of table {}", snapshotSplit.splitId(), table.id());
+ String filter =
+ SnapshotFilterUtils.getSnapshotFilter(
+ sourceConfig.getSnapshotFilters(), table.id());
+ if (filter != null) {
+ LOG.info(
+ "Filter for split '{}' of table {} is: {}",
+ snapshotSplit.splitId(),
+ table.id(),
+ filter);
+ }
+
final String selectSql =
StatementUtils.buildSplitScanQuery(
snapshotSplit.getTableId(),
snapshotSplit.getSplitKeyType(),
snapshotSplit.getSplitStart() == null,
- snapshotSplit.getSplitEnd() == null);
+ snapshotSplit.getSplitEnd() == null,
+ filter);
LOG.info(
"For split '{}' of table {} using select statement: '{}'",
snapshotSplit.splitId(),
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlSourceBuilder.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlSourceBuilder.java
index caf316d1b4a..349a320ebd3 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlSourceBuilder.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/MySqlSourceBuilder.java
@@ -292,6 +292,15 @@ public MySqlSourceBuilder assignUnboundedChunkFirst(boolean assignUnboundedCh
return this;
}
+ /**
+ * When reading a table snapshot, the rows of captured tables will be filtered using the
+ * specified filter expression (AKA a SQL WHERE clause).
+ */
+ public MySqlSourceBuilder snapshotFilters(String table, String filter) {
+ this.configFactory.snapshotFilters(table, filter);
+ return this;
+ }
+
/**
* Build the {@link MySqlSource}.
*
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java
index 4821eaba2ea..965831c28bf 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java
@@ -26,6 +26,7 @@
import org.apache.flink.cdc.connectors.mysql.source.split.MySqlSnapshotSplit;
import org.apache.flink.cdc.connectors.mysql.source.utils.ChunkUtils;
import org.apache.flink.cdc.connectors.mysql.source.utils.ObjectUtils;
+import org.apache.flink.cdc.connectors.mysql.source.utils.SnapshotFilterUtils;
import org.apache.flink.cdc.connectors.mysql.source.utils.StatementUtils;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
@@ -148,12 +149,16 @@ private void analyzeTable(MySqlPartition partition, TableId tableId) {
splitColumn =
ChunkUtils.getChunkKeyColumn(
currentSplittingTable, sourceConfig.getChunkKeyColumns());
+ String filter =
+ SnapshotFilterUtils.getSnapshotFilter(
+ sourceConfig.getSnapshotFilters(), tableId);
splitType =
ChunkUtils.getChunkKeyColumnType(
splitColumn, sourceConfig.isTreatTinyInt1AsBoolean());
minMaxOfSplitColumn =
- StatementUtils.queryMinMax(jdbcConnection, tableId, splitColumn.name());
- approximateRowCnt = StatementUtils.queryApproximateRowCnt(jdbcConnection, tableId);
+ StatementUtils.queryMinMax(jdbcConnection, tableId, splitColumn.name(), filter);
+ approximateRowCnt =
+ StatementUtils.queryRowCnt(jdbcConnection, tableId, splitColumn.name(), filter);
} catch (Exception e) {
throw new RuntimeException("Fail to analyze table in chunk splitter.", e);
}
@@ -171,6 +176,8 @@ private MySqlSnapshotSplit splitOneUnevenlySizedChunk(MySqlPartition partition,
nextChunkStart == ChunkSplitterState.ChunkBound.START_BOUND
? "null"
: chunkStartVal.toString());
+ String filter =
+ SnapshotFilterUtils.getSnapshotFilter(sourceConfig.getSnapshotFilters(), tableId);
// we start from [null, min + chunk_size) and avoid [null, min)
Object chunkEnd =
nextChunkEnd(
@@ -181,7 +188,8 @@ private MySqlSnapshotSplit splitOneUnevenlySizedChunk(MySqlPartition partition,
tableId,
splitColumn.name(),
minMaxOfSplitColumn[1],
- chunkSize);
+ chunkSize,
+ filter);
// may sleep a while to avoid DDOS on MySQL server
maySleep(nextChunkId, tableId);
if (chunkEnd != null && ObjectUtils.compare(chunkEnd, minMaxOfSplitColumn[1]) <= 0) {
@@ -316,7 +324,6 @@ public List splitEvenlySizedChunks(
break;
}
}
-
// add the ending split
splits.add(ChunkRange.of(chunkStart, null));
return splits;
@@ -328,16 +335,17 @@ private Object nextChunkEnd(
TableId tableId,
String splitColumnName,
Object max,
- int chunkSize)
+ int chunkSize,
+ @Nullable String filter)
throws SQLException {
// chunk end might be null when max values are removed
Object chunkEnd =
StatementUtils.queryNextChunkMax(
- jdbc, tableId, splitColumnName, chunkSize, previousChunkEnd);
+ jdbc, tableId, splitColumnName, chunkSize, previousChunkEnd, filter);
if (Objects.equals(previousChunkEnd, chunkEnd)) {
// we don't allow equal chunk start and end,
// should query the next one larger than chunkEnd
- chunkEnd = StatementUtils.queryMin(jdbc, tableId, splitColumnName, chunkEnd);
+ chunkEnd = StatementUtils.queryMin(jdbc, tableId, splitColumnName, chunkEnd, filter);
// queryMin will return null when the chunkEnd is the max value,
// this will happen when the mysql table ignores the capitalization.
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfig.java
index 260a7cd2b5d..ac2f6eb3538 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfig.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfig.java
@@ -67,6 +67,7 @@ public class MySqlSourceConfig implements Serializable {
private final Properties jdbcProperties;
private final Map chunkKeyColumns;
private final boolean skipSnapshotBackfill;
+ private final Map snapshotFilters;
private final boolean parseOnLineSchemaChanges;
public static boolean useLegacyJsonFormat = true;
private final boolean assignUnboundedChunkFirst;
@@ -108,7 +109,8 @@ public class MySqlSourceConfig implements Serializable {
boolean parseOnLineSchemaChanges,
boolean treatTinyInt1AsBoolean,
boolean useLegacyJsonFormat,
- boolean assignUnboundedChunkFirst) {
+ boolean assignUnboundedChunkFirst,
+ Map snapshotFilters) {
this.hostname = checkNotNull(hostname);
this.port = port;
this.username = checkNotNull(username);
@@ -148,6 +150,7 @@ public class MySqlSourceConfig implements Serializable {
this.jdbcProperties = jdbcProperties;
this.chunkKeyColumns = chunkKeyColumns;
this.skipSnapshotBackfill = skipSnapshotBackfill;
+ this.snapshotFilters = snapshotFilters;
this.parseOnLineSchemaChanges = parseOnLineSchemaChanges;
this.treatTinyInt1AsBoolean = treatTinyInt1AsBoolean;
this.useLegacyJsonFormat = useLegacyJsonFormat;
@@ -285,4 +288,8 @@ public boolean isSkipSnapshotBackfill() {
public boolean isTreatTinyInt1AsBoolean() {
return treatTinyInt1AsBoolean;
}
+
+ public Map getSnapshotFilters() {
+ return snapshotFilters;
+ }
}
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java
index 427115edea7..f100c872c57 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java
@@ -70,6 +70,7 @@ public class MySqlSourceConfigFactory implements Serializable {
private Properties dbzProperties;
private Map chunkKeyColumns = new HashMap<>();
private boolean skipSnapshotBackfill = false;
+ private Map snapshotFilters = new HashMap<>();
private boolean parseOnLineSchemaChanges = false;
private boolean treatTinyInt1AsBoolean = true;
private boolean useLegacyJsonFormat = true;
@@ -304,6 +305,24 @@ public MySqlSourceConfigFactory closeIdleReaders(boolean closeIdleReaders) {
return this;
}
+ /**
+ * When reading a table snapshot, the rows of captured tables will be filtered using the
+ * specified filter expression (AKA a SQL WHERE clause).
+ */
+ public MySqlSourceConfigFactory snapshotFilters(String table, String filter) {
+ this.snapshotFilters.put(table, filter);
+ return this;
+ }
+
+ /**
+ * When reading a table snapshot, the rows of captured tables will be filtered using the
+ * specified filter expression (AKA a SQL WHERE clause).
+ */
+ public MySqlSourceConfigFactory snapshotFilters(Map snapshotFilters) {
+ this.snapshotFilters.putAll(snapshotFilters);
+ return this;
+ }
+
/** Whether to parse gh-ost/pt-osc utility generated schema change events. Defaults to false. */
public MySqlSourceConfigFactory parseOnLineSchemaChanges(boolean parseOnLineSchemaChanges) {
this.parseOnLineSchemaChanges = parseOnLineSchemaChanges;
@@ -421,6 +440,7 @@ public MySqlSourceConfig createConfig(int subtaskId, String serverName) {
parseOnLineSchemaChanges,
treatTinyInt1AsBoolean,
useLegacyJsonFormat,
- assignUnboundedChunkFirst);
+ assignUnboundedChunkFirst,
+ snapshotFilters);
}
}
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceOptions.java
index a8e143f5fc5..673d39396d6 100644
--- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceOptions.java
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/config/MySqlSourceOptions.java
@@ -270,6 +270,15 @@ public class MySqlSourceOptions {
.withDescription(
"Whether to skip backfill in snapshot reading phase. If backfill is skipped, changes on captured tables during snapshot phase will be consumed later in binlog reading phase instead of being merged into the snapshot. WARNING: Skipping backfill might lead to data inconsistency because some binlog events happened within the snapshot phase might be replayed (only at-least-once semantic is promised). For example updating an already updated value in snapshot, or deleting an already deleted entry in snapshot. These replayed binlog events should be handled specially.");
+ @Experimental
+ public static final ConfigOption SCAN_SNAPSHOT_FILTER =
+ ConfigOptions.key("scan.snapshot.filter")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "When reading a table snapshot, the rows of captured tables will be filtered using the specified filter expression (AKA a SQL WHERE clause). "
+ + "By default, no filter is applied, meaning the entire table will be synchronized. e.g. `id > 100`");
+
@Experimental
public static final ConfigOption PARSE_ONLINE_SCHEMA_CHANGES =
ConfigOptions.key("scan.parse.online.schema.changes.enabled")
diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/utils/SnapshotFilterUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/utils/SnapshotFilterUtils.java
new file mode 100644
index 00000000000..7ad16d5beab
--- /dev/null
+++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mysql-cdc/src/main/java/org/apache/flink/cdc/connectors/mysql/source/utils/SnapshotFilterUtils.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.mysql.source.utils;
+
+import org.apache.flink.cdc.common.schema.Selectors;
+
+import io.debezium.relational.TableId;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/** Utilities to filter snapshot of table. */
+public class SnapshotFilterUtils {
+
+ private SnapshotFilterUtils() {}
+
+ private static final Map