2020import org .apache .flink .api .connector .source .SourceOutput ;
2121import org .apache .flink .cdc .common .event .CreateTableEvent ;
2222import org .apache .flink .cdc .common .event .Event ;
23+ import org .apache .flink .cdc .common .event .SchemaChangeEvent ;
2324import org .apache .flink .cdc .common .schema .Schema ;
24- import org .apache .flink .cdc .common .types .DataType ;
2525import org .apache .flink .cdc .connectors .base .options .StartupOptions ;
2626import org .apache .flink .cdc .connectors .base .source .meta .offset .OffsetFactory ;
2727import org .apache .flink .cdc .connectors .base .source .meta .split .SnapshotSplit ;
2828import org .apache .flink .cdc .connectors .base .source .meta .split .SourceSplitBase ;
2929import org .apache .flink .cdc .connectors .base .source .meta .split .SourceSplitState ;
3030import org .apache .flink .cdc .connectors .base .source .metrics .SourceReaderMetrics ;
31- import org .apache .flink .cdc .connectors .base .source .reader .IncrementalSourceRecordEmitter ;
3231import org .apache .flink .cdc .connectors .postgres .source .PostgresDialect ;
3332import org .apache .flink .cdc .connectors .postgres .source .config .PostgresSourceConfig ;
33+ import org .apache .flink .cdc .connectors .postgres .source .schema .DebeziumSchemaRecord ;
3434import org .apache .flink .cdc .connectors .postgres .source .utils .TableDiscoveryUtils ;
3535import org .apache .flink .cdc .connectors .postgres .utils .PostgresSchemaUtils ;
36- import org .apache .flink .cdc .connectors .postgres .utils .PostgresTypeUtils ;
3736import org .apache .flink .cdc .debezium .DebeziumDeserializationSchema ;
3837import org .apache .flink .cdc .debezium .event .DebeziumEventDeserializationSchema ;
3938import org .apache .flink .connector .base .source .reader .RecordEmitter ;
4039
4140import io .debezium .connector .postgresql .connection .PostgresConnection ;
4241import io .debezium .data .Envelope ;
43- import io .debezium .relational .Column ;
4442import io .debezium .relational .Table ;
4543import io .debezium .relational .TableId ;
4644import io .debezium .relational .history .TableChanges ;
5351import java .util .HashSet ;
5452import java .util .List ;
5553import java .util .Map ;
56- import java .util .Objects ;
5754import java .util .Set ;
5855
5956import static io .debezium .connector .AbstractSourceInfo .SCHEMA_NAME_KEY ;
6259import static org .apache .flink .cdc .connectors .base .utils .SourceRecordUtils .isDataChangeRecord ;
6360import static org .apache .flink .cdc .connectors .base .utils .SourceRecordUtils .isSchemaChangeEvent ;
6461import static org .apache .flink .cdc .connectors .postgres .utils .PostgresSchemaUtils .toCdcTableId ;
62+ import static org .apache .flink .cdc .connectors .postgres .utils .SchemaChangeUtil .inferSchemaChangeEvent ;
63+ import static org .apache .flink .cdc .connectors .postgres .utils .SchemaChangeUtil .toCreateTableEvent ;
6564
6665/** The {@link RecordEmitter} implementation for PostgreSQL pipeline connector. */
67- public class PostgresPipelineRecordEmitter <T > extends IncrementalSourceRecordEmitter <T > {
66+ public class PostgresPipelineRecordEmitter <T > extends PostgresSourceRecordEmitter <T > {
6867 private final PostgresSourceConfig sourceConfig ;
6968 private final PostgresDialect postgresDialect ;
7069
7170 // Used when startup mode is initial
72- private Set <TableId > alreadySendCreateTableTables ;
71+ private final Set <TableId > alreadySendCreateTableTables ;
72+ private final boolean isBounded ;
73+ private final boolean includeDatabaseInTableId ;
74+ private final Map <TableId , CreateTableEvent > createTableEventCache ;
7375
7476 // Used when startup mode is not initial
7577 private boolean shouldEmitAllCreateTableEventsInSnapshotMode = true ;
76- private boolean isBounded = false ;
77- private boolean includeDatabaseInTableId = false ;
78-
79- private final Map <TableId , CreateTableEvent > createTableEventCache ;
8078
8179 public PostgresPipelineRecordEmitter (
82- DebeziumDeserializationSchema debeziumDeserializationSchema ,
80+ DebeziumDeserializationSchema < T > debeziumDeserializationSchema ,
8381 SourceReaderMetrics sourceReaderMetrics ,
8482 PostgresSourceConfig sourceConfig ,
8583 OffsetFactory offsetFactory ,
@@ -108,16 +106,13 @@ public void applySplit(SourceSplitBase split) {
108106 } else {
109107 for (Map .Entry <TableId , TableChanges .TableChange > entry :
110108 split .getTableSchemas ().entrySet ()) {
111- TableId tableId =
112- entry .getKey (); // Use the TableId from the map key which contains full info
113109 TableChanges .TableChange tableChange = entry .getValue ();
110+
111+ // Currently serialize of split will mismatch the catalog and schema of Table.id()
112+
113+ Table table = tableChange .getTable ();
114114 CreateTableEvent createTableEvent =
115- new CreateTableEvent (
116- toCdcTableId (
117- tableId ,
118- sourceConfig .getDatabaseList ().get (0 ),
119- includeDatabaseInTableId ),
120- buildSchemaFromTable (tableChange .getTable ()));
115+ toCreateTableEvent (entry .getKey (), table , sourceConfig , postgresDialect );
121116 ((DebeziumEventDeserializationSchema ) debeziumDeserializationSchema )
122117 .applyChangeEvent (createTableEvent );
123118 }
@@ -141,60 +136,42 @@ protected void processElement(
141136 sendCreateTableEvent (tableId , (SourceOutput <Event >) output );
142137 alreadySendCreateTableTables .add (tableId );
143138 }
144- } else {
145- boolean isDataChangeRecord = isDataChangeRecord (element );
146- if (isDataChangeRecord || isSchemaChangeEvent (element )) {
147- TableId tableId = getTableId (element );
148- if (!alreadySendCreateTableTables .contains (tableId )) {
149- CreateTableEvent createTableEvent = createTableEventCache .get (tableId );
150- if (createTableEvent != null ) {
151- output .collect ((T ) createTableEvent );
152- }
153- alreadySendCreateTableTables .add (tableId );
154- }
155- // In rare case, we may miss some CreateTableEvents before DataChangeEvents.
156- // Don't send CreateTableEvent for SchemaChangeEvents as it's the latest schema.
157- if (isDataChangeRecord && !createTableEventCache .containsKey (tableId )) {
158- CreateTableEvent createTableEvent = getCreateTableEvent (sourceConfig , tableId );
139+ } else if (isDataChangeRecord (element )) {
140+ TableId tableId = getTableId (element );
141+ if (!alreadySendCreateTableTables .contains (tableId )) {
142+ CreateTableEvent createTableEvent = createTableEventCache .get (tableId );
143+ if (createTableEvent != null ) {
159144 output .collect ((T ) createTableEvent );
160- createTableEventCache .put (tableId , createTableEvent );
161145 }
146+ alreadySendCreateTableTables .add (tableId );
147+ }
148+ // In rare case, we may miss some CreateTableEvents before DataChangeEvents.
149+ // Don't send CreateTableEvent for SchemaChangeEvents as it's the latest schema.
150+ if (!createTableEventCache .containsKey (tableId )) {
151+ CreateTableEvent createTableEvent = getCreateTableEvent (sourceConfig , tableId );
152+ output .collect ((T ) createTableEvent );
153+ createTableEventCache .put (tableId , createTableEvent );
162154 }
155+ } else if (isSchemaChangeEvent (element ) && sourceConfig .isSchemaChangeEnabled ()) {
156+ handleSchemaChangeRecord (element , output , splitState );
163157 }
164158 super .processElement (element , output , splitState );
165159 }
166160
167- private Schema buildSchemaFromTable (Table table ) {
168- List <Column > columns = table .columns ();
169- Schema .Builder tableBuilder = Schema .newBuilder ();
170- for (int i = 0 ; i < columns .size (); i ++) {
171- Column column = columns .get (i );
172-
173- String colName = column .name ();
174- DataType dataType ;
175- try (PostgresConnection jdbc = postgresDialect .openJdbcConnection ()) {
176- dataType =
177- PostgresTypeUtils .fromDbzColumn (
178- column ,
179- this .sourceConfig .getDbzConnectorConfig (),
180- jdbc .getTypeRegistry ());
181- }
182- if (!column .isOptional ()) {
183- dataType = dataType .notNull ();
184- }
185- tableBuilder .physicalColumn (
186- colName ,
187- dataType ,
188- column .comment (),
189- column .defaultValueExpression ().orElse (null ));
190- }
191- tableBuilder .comment (table .comment ());
192-
193- List <String > primaryKey = table .primaryKeyColumnNames ();
194- if (Objects .nonNull (primaryKey ) && !primaryKey .isEmpty ()) {
195- tableBuilder .primaryKey (primaryKey );
161+ private void handleSchemaChangeRecord (
162+ SourceRecord element , SourceOutput <T > output , SourceSplitState splitState ) {
163+ Map <TableId , TableChanges .TableChange > existedTableSchemas =
164+ splitState .toSourceSplit ().getTableSchemas ();
165+ DebeziumSchemaRecord schemaRecord = (DebeziumSchemaRecord ) element ;
166+ Table schemaAfter = schemaRecord .getTable ();
167+ Table schemaBefore = null ;
168+ if (existedTableSchemas .containsKey (schemaAfter .id ())) {
169+ schemaBefore = existedTableSchemas .get (schemaAfter .id ()).getTable ();
196170 }
197- return tableBuilder .build ();
171+ List <SchemaChangeEvent > schemaChangeEvents =
172+ inferSchemaChangeEvent (
173+ schemaAfter .id (), schemaBefore , schemaAfter , sourceConfig , postgresDialect );
174+ schemaChangeEvents .forEach (schemaChangeEvent -> output .collect ((T ) schemaChangeEvent ));
198175 }
199176
200177 private void sendCreateTableEvent (TableId tableId , SourceOutput <Event > output ) {
0 commit comments