textStyles = EnumSet.noneOf(TextStyle.class);
+
+ /**
+ * Creates a block of text with placeholders ("%s") that will be replaced with proper string
+ * representation of given {@link InlineElement}. For example:
+ *
+ * {@code text("This is a text with a link %s", link("https://somepage", "to here"))}
+ *
+ * @param format text with placeholders for elements
+ * @param elements elements to be put in the text
+ * @return block of text
+ */
+ public static TextElement text(String format, InlineElement... elements) {
+ return new TextElement(format, Arrays.asList(elements));
+ }
+
+ /**
+ * Creates a simple block of text.
+ *
+ * @param text a simple block of text
+ * @return block of text
+ */
+ public static TextElement text(String text) {
+ return new TextElement(text, Collections.emptyList());
+ }
+
+ /** Wraps a list of {@link InlineElement}s into a single {@link TextElement}. */
+ public static InlineElement wrap(InlineElement... elements) {
+ return text(Strings.repeat("%s", elements.length), elements);
+ }
+
+ /**
+ * Creates a block of text formatted as code.
+ *
+ * @param text a block of text that will be formatted as code
+ * @return block of text formatted as code
+ */
+ public static TextElement code(String text) {
+ TextElement element = text(text);
+ element.textStyles.add(TextStyle.CODE);
+ return element;
+ }
+
+ public String getFormat() {
+ return format;
+ }
+
+ public List getElements() {
+ return elements;
+ }
+
+ public EnumSet getStyles() {
+ return textStyles;
+ }
+
+ private TextElement(String format, List elements) {
+ this.format = format;
+ this.elements = elements;
+ }
+
+ @Override
+ public void format(Formatter formatter) {
+ formatter.format(this);
+ }
+
+ /** Styles that can be applied to {@link TextElement} e.g. code, bold etc. */
+ @PublicEvolving
+ public enum TextStyle {
+ CODE
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryArrayData.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryArrayData.java
new file mode 100644
index 00000000000..991085ae226
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryArrayData.java
@@ -0,0 +1,623 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.data.ArrayData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.LocalZonedTimestampData;
+import org.apache.flink.cdc.common.data.MapData;
+import org.apache.flink.cdc.common.data.RecordData;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.data.ZonedTimestampData;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.utils.DataTypeUtils;
+import org.apache.flink.core.memory.MemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+
+import java.lang.reflect.Array;
+
+import static org.apache.flink.core.memory.MemoryUtils.UNSAFE;
+
+/**
+ * A binary implementation of {@link ArrayData} which is backed by {@link MemorySegment}s.
+ *
+ * This class provides a way to store array data in a binary format that is compact and
+ * efficient. It uses {@link MemorySegment}s to manage the binary representation of the data,
+ * allowing for efficient storage and access.
+ *
+ *
The binary layout of {@link BinaryArrayData} is structured as follows:
+ *
+ *
+ * [size(int)] + [null bits(4-byte word boundaries)] + [values or offset&length] + [variable length part].
+ *
+ *
+ *
+ * size: The first 4 bytes store the number of elements in the array.
+ * null bits: A bitmap to track null values, aligned to 4-byte word boundaries. Each
+ * bit represents whether an element is null.
+ * values or offset&length: The values of the array elements. For fixed-length
+ * primitive types, the values are stored directly. For variable-length types (e.g., strings,
+ * maps), this part stores the offset and length of the actual data in the variable length
+ * part.
+ * variable length part: This part of the memory segment stores the actual data for
+ * variable-length types (e.g., strings, maps).
+ *
+ *
+ * The header size is calculated based on the number of elements in the array, ensuring efficient
+ * alignment and access.
+ *
+ *
For fields that hold fixed-length primitive types, such as long, double, or int, they are
+ * stored compactly in bytes, just like the original Java array.
+ *
+ *
The class also provides methods to convert the binary data back into Java primitive arrays,
+ * handling various types such as boolean, byte, short, int, long, float, and double.
+ */
+public final class BinaryArrayData extends BinarySection implements ArrayData {
+
+ /** Offset for Arrays. */
+ private static final int BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
+
+ private static final int BOOLEAN_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(boolean[].class);
+ private static final int SHORT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(short[].class);
+ private static final int INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class);
+ private static final int LONG_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(long[].class);
+ private static final int FLOAT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(float[].class);
+ private static final int DOUBLE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(double[].class);
+
+ /**
+ * Calculates the size of the header in bytes for an array with the specified number of fields.
+ *
+ *
The header consists of:
+ *
+ *
+ * 4 bytes to store the size of the array (number of elements).
+ * A bitmap to track null values, where each bit represents whether an element is null.
+ * This bitmap is aligned to 4-byte word boundaries for efficient memory access and to
+ * facilitate the use of bitwise operations.
+ *
+ *
+ * The size of the bitmap is determined by the number of elements in the array:
+ *
+ *
+ * Each element requires 1 bit in the bitmap.
+ * The total number of bits is rounded up to the nearest multiple of 32 to ensure
+ * alignment to 4-byte word boundaries (i.e., a 32-bit integer).
+ *
+ *
+ * The formula for calculating the size of the header is:
+ *
+ *
+ * header size = 4 bytes (for array size) + ((numFields + 31) / 32) * 4 bytes (for null bitmap)
+ *
+ *
+ * @param numFields the number of elements in the array
+ * @return the size of the header in bytes
+ */
+ public static int calculateHeaderInBytes(int numFields) {
+ return 4 + ((numFields + 31) / 32) * 4;
+ }
+
+ /**
+ * It store real value when type is primitive. It store the length and offset of variable-length
+ * part when type is string, map, etc.
+ */
+ public static int calculateFixLengthPartSize(DataType type) {
+ // ordered by type root definition
+ switch (type.getTypeRoot()) {
+ case BOOLEAN:
+ case TINYINT:
+ return 1;
+ case CHAR:
+ case VARCHAR:
+ case BINARY:
+ case VARBINARY:
+ case DECIMAL:
+ case BIGINT:
+ case DOUBLE:
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ case ARRAY:
+ case MAP:
+ case ROW:
+ // long and double are 8 bytes;
+ // otherwise it stores the length and offset of the variable-length part for types
+ // such as is string, map, etc.
+ return 8;
+ case TIMESTAMP_WITH_TIME_ZONE:
+ throw new UnsupportedOperationException();
+ case SMALLINT:
+ return 2;
+ case INTEGER:
+ case FLOAT:
+ case DATE:
+ case TIME_WITHOUT_TIME_ZONE:
+ return 4;
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ // The number of elements in this array
+ private int size;
+
+ /** The position to start storing array elements. */
+ private int elementOffset;
+
+ public BinaryArrayData() {}
+
+ private void assertIndexIsValid(int index) {
+ assert index >= 0 : "index (" + index + ") should >= 0";
+ assert index < size : "index (" + index + ") should < " + size;
+ }
+
+ private int getElementOffset(int ordinal, int elementSize) {
+ return elementOffset + ordinal * elementSize;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public void pointTo(MemorySegment[] segments, int offset, int sizeInBytes) {
+ // Read the number of elements from the first 4 bytes.
+ final int size = BinarySegmentUtils.getInt(segments, offset);
+ assert size >= 0 : "size (" + size + ") should >= 0";
+
+ this.size = size;
+ super.pointTo(segments, offset, sizeInBytes);
+ this.elementOffset = offset + calculateHeaderInBytes(this.size);
+ }
+
+ @Override
+ public boolean isNullAt(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.bitGet(segments, offset + 4, pos);
+ }
+
+ public void setNullAt(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ }
+
+ public void setNotNullAt(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitUnSet(segments, offset + 4, pos);
+ }
+
+ @Override
+ public long getLong(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getLong(segments, getElementOffset(pos, 8));
+ }
+
+ public void setLong(int pos, long value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setLong(segments, getElementOffset(pos, 8), value);
+ }
+
+ public void setNullLong(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setLong(segments, getElementOffset(pos, 8), 0L);
+ }
+
+ @Override
+ public int getInt(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getInt(segments, getElementOffset(pos, 4));
+ }
+
+ public void setInt(int pos, int value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setInt(segments, getElementOffset(pos, 4), value);
+ }
+
+ public void setNullInt(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setInt(segments, getElementOffset(pos, 4), 0);
+ }
+
+ @Override
+ public StringData getString(int pos) {
+ assertIndexIsValid(pos);
+ int fieldOffset = getElementOffset(pos, 8);
+ final long offsetAndSize = BinarySegmentUtils.getLong(segments, fieldOffset);
+ return BinarySegmentUtils.readStringData(segments, offset, fieldOffset, offsetAndSize);
+ }
+
+ @Override
+ public DecimalData getDecimal(int pos, int precision, int scale) {
+ assertIndexIsValid(pos);
+ if (DecimalData.isCompact(precision)) {
+ return DecimalData.fromUnscaledLong(
+ BinarySegmentUtils.getLong(segments, getElementOffset(pos, 8)),
+ precision,
+ scale);
+ }
+
+ int fieldOffset = getElementOffset(pos, 8);
+ final long offsetAndSize = BinarySegmentUtils.getLong(segments, fieldOffset);
+ return BinarySegmentUtils.readDecimalData(
+ segments, offset, offsetAndSize, precision, scale);
+ }
+
+ @Override
+ public TimestampData getTimestamp(int pos, int precision) {
+ assertIndexIsValid(pos);
+
+ if (TimestampData.isCompact(precision)) {
+ return TimestampData.fromMillis(
+ BinarySegmentUtils.getLong(segments, getElementOffset(pos, 8)));
+ }
+
+ int fieldOffset = getElementOffset(pos, 8);
+ final long offsetAndNanoOfMilli = BinarySegmentUtils.getLong(segments, fieldOffset);
+ return BinarySegmentUtils.readTimestampData(segments, offset, offsetAndNanoOfMilli);
+ }
+
+ @Override
+ public LocalZonedTimestampData getLocalZonedTimestamp(int pos, int precision) {
+ throw new UnsupportedOperationException("Not support LocalZonedTimestampData");
+ }
+
+ @Override
+ public ZonedTimestampData getZonedTimestamp(int pos, int precision) {
+ throw new UnsupportedOperationException("Not support ZonedTimestampData");
+ }
+
+ @Override
+ public byte[] getBinary(int pos) {
+ assertIndexIsValid(pos);
+ int fieldOffset = getElementOffset(pos, 8);
+ final long offsetAndSize = BinarySegmentUtils.getLong(segments, fieldOffset);
+ return BinarySegmentUtils.readBinary(segments, offset, fieldOffset, offsetAndSize);
+ }
+
+ @Override
+ public ArrayData getArray(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readArrayData(segments, offset, getLong(pos));
+ }
+
+ @Override
+ public MapData getMap(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readMapData(segments, offset, getLong(pos));
+ }
+
+ @Override
+ public RecordData getRecord(int pos, int numFields) {
+ assertIndexIsValid(pos);
+ int fieldOffset = getElementOffset(pos, 8);
+ final long offsetAndSize = BinarySegmentUtils.getLong(segments, fieldOffset);
+ return BinarySegmentUtils.readRecordData(segments, numFields, offset, offsetAndSize);
+ }
+
+ @Override
+ public boolean getBoolean(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getBoolean(segments, getElementOffset(pos, 1));
+ }
+
+ public void setBoolean(int pos, boolean value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setBoolean(segments, getElementOffset(pos, 1), value);
+ }
+
+ public void setNullBoolean(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setBoolean(segments, getElementOffset(pos, 1), false);
+ }
+
+ @Override
+ public byte getByte(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getByte(segments, getElementOffset(pos, 1));
+ }
+
+ public void setByte(int pos, byte value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setByte(segments, getElementOffset(pos, 1), value);
+ }
+
+ public void setNullByte(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setByte(segments, getElementOffset(pos, 1), (byte) 0);
+ }
+
+ @Override
+ public short getShort(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getShort(segments, getElementOffset(pos, 2));
+ }
+
+ public void setShort(int pos, short value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setShort(segments, getElementOffset(pos, 2), value);
+ }
+
+ public void setNullShort(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setShort(segments, getElementOffset(pos, 2), (short) 0);
+ }
+
+ @Override
+ public float getFloat(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getFloat(segments, getElementOffset(pos, 4));
+ }
+
+ public void setFloat(int pos, float value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setFloat(segments, getElementOffset(pos, 4), value);
+ }
+
+ public void setNullFloat(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setFloat(segments, getElementOffset(pos, 4), 0F);
+ }
+
+ @Override
+ public double getDouble(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.getDouble(segments, getElementOffset(pos, 8));
+ }
+
+ public void setDouble(int pos, double value) {
+ assertIndexIsValid(pos);
+ setNotNullAt(pos);
+ BinarySegmentUtils.setDouble(segments, getElementOffset(pos, 8), value);
+ }
+
+ public void setNullDouble(int pos) {
+ assertIndexIsValid(pos);
+ BinarySegmentUtils.bitSet(segments, offset + 4, pos);
+ BinarySegmentUtils.setDouble(segments, getElementOffset(pos, 8), 0.0);
+ }
+
+ public void setDecimal(int pos, DecimalData value, int precision) {
+ assertIndexIsValid(pos);
+
+ if (DecimalData.isCompact(precision)) {
+ // compact format
+ setLong(pos, value.toUnscaledLong());
+ } else {
+ int fieldOffset = getElementOffset(pos, 8);
+ int cursor = (int) (BinarySegmentUtils.getLong(segments, fieldOffset) >>> 32);
+ assert cursor > 0 : "invalid cursor " + cursor;
+ // zero-out the bytes
+ BinarySegmentUtils.setLong(segments, offset + cursor, 0L);
+ BinarySegmentUtils.setLong(segments, offset + cursor + 8, 0L);
+
+ if (value == null) {
+ setNullAt(pos);
+ // keep the offset for future update
+ BinarySegmentUtils.setLong(segments, fieldOffset, ((long) cursor) << 32);
+ } else {
+
+ byte[] bytes = value.toUnscaledBytes();
+ assert (bytes.length <= 16);
+
+ // Write the bytes to the variable length portion.
+ BinarySegmentUtils.copyFromBytes(segments, offset + cursor, bytes, 0, bytes.length);
+ setLong(pos, ((long) cursor << 32) | ((long) bytes.length));
+ }
+ }
+ }
+
+ public void setTimestamp(int pos, TimestampData value, int precision) {
+ assertIndexIsValid(pos);
+
+ if (TimestampData.isCompact(precision)) {
+ setLong(pos, value.getMillisecond());
+ } else {
+ int fieldOffset = getElementOffset(pos, 8);
+ int cursor = (int) (BinarySegmentUtils.getLong(segments, fieldOffset) >>> 32);
+ assert cursor > 0 : "invalid cursor " + cursor;
+
+ if (value == null) {
+ setNullAt(pos);
+ // zero-out the bytes
+ BinarySegmentUtils.setLong(segments, offset + cursor, 0L);
+ // keep the offset for future update
+ BinarySegmentUtils.setLong(segments, fieldOffset, ((long) cursor) << 32);
+ } else {
+ // write millisecond to the variable length portion.
+ BinarySegmentUtils.setLong(segments, offset + cursor, value.getMillisecond());
+ // write nanoOfMillisecond to the fixed-length portion.
+ setLong(pos, ((long) cursor << 32) | (long) value.getNanoOfMillisecond());
+ }
+ }
+ }
+
+ public boolean anyNull() {
+ for (int i = offset + 4; i < elementOffset; i += 4) {
+ if (BinarySegmentUtils.getInt(segments, i) != 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void checkNoNull() {
+ if (anyNull()) {
+ throw new RuntimeException("Primitive array must not contain a null value.");
+ }
+ }
+
+ @Override
+ public boolean[] toBooleanArray() {
+ checkNoNull();
+ boolean[] values = new boolean[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, BOOLEAN_ARRAY_OFFSET, size);
+ return values;
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ checkNoNull();
+ byte[] values = new byte[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, BYTE_ARRAY_BASE_OFFSET, size);
+ return values;
+ }
+
+ @Override
+ public short[] toShortArray() {
+ checkNoNull();
+ short[] values = new short[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, SHORT_ARRAY_OFFSET, size * 2);
+ return values;
+ }
+
+ @Override
+ public int[] toIntArray() {
+ checkNoNull();
+ int[] values = new int[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, INT_ARRAY_OFFSET, size * 4);
+ return values;
+ }
+
+ @Override
+ public long[] toLongArray() {
+ checkNoNull();
+ long[] values = new long[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, LONG_ARRAY_OFFSET, size * 8);
+ return values;
+ }
+
+ @Override
+ public float[] toFloatArray() {
+ checkNoNull();
+ float[] values = new float[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, FLOAT_ARRAY_OFFSET, size * 4);
+ return values;
+ }
+
+ @Override
+ public double[] toDoubleArray() {
+ checkNoNull();
+ double[] values = new double[size];
+ BinarySegmentUtils.copyToUnsafe(
+ segments, elementOffset, values, DOUBLE_ARRAY_OFFSET, size * 8);
+ return values;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T[] toObjectArray(DataType elementType) {
+ Class elementClass = (Class) DataTypeUtils.toInternalConversionClass(elementType);
+ ArrayData.ElementGetter elementGetter = ArrayData.createElementGetter(elementType);
+ T[] values = (T[]) Array.newInstance(elementClass, size);
+ for (int i = 0; i < size; i++) {
+ if (!isNullAt(i)) {
+ values[i] = (T) elementGetter.getElementOrNull(this, i);
+ }
+ }
+ return values;
+ }
+
+ public BinaryArrayData copy() {
+ return copy(new BinaryArrayData());
+ }
+
+ public BinaryArrayData copy(BinaryArrayData reuse) {
+ byte[] bytes = BinarySegmentUtils.copyToBytes(segments, offset, sizeInBytes);
+ reuse.pointTo(MemorySegmentFactory.wrap(bytes), 0, sizeInBytes);
+ return reuse;
+ }
+
+ @Override
+ public int hashCode() {
+ return BinarySegmentUtils.hashByWords(segments, offset, sizeInBytes);
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Construction Utilities
+ // ------------------------------------------------------------------------------------------
+
+ public static BinaryArrayData fromPrimitiveArray(boolean[] arr) {
+ return fromPrimitiveArray(arr, BOOLEAN_ARRAY_OFFSET, arr.length, 1);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(byte[] arr) {
+ return fromPrimitiveArray(arr, BYTE_ARRAY_BASE_OFFSET, arr.length, 1);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(short[] arr) {
+ return fromPrimitiveArray(arr, SHORT_ARRAY_OFFSET, arr.length, 2);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(int[] arr) {
+ return fromPrimitiveArray(arr, INT_ARRAY_OFFSET, arr.length, 4);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(long[] arr) {
+ return fromPrimitiveArray(arr, LONG_ARRAY_OFFSET, arr.length, 8);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(float[] arr) {
+ return fromPrimitiveArray(arr, FLOAT_ARRAY_OFFSET, arr.length, 4);
+ }
+
+ public static BinaryArrayData fromPrimitiveArray(double[] arr) {
+ return fromPrimitiveArray(arr, DOUBLE_ARRAY_OFFSET, arr.length, 8);
+ }
+
+ private static BinaryArrayData fromPrimitiveArray(
+ Object arr, int offset, int length, int elementSize) {
+ final long headerInBytes = calculateHeaderInBytes(length);
+ final long valueRegionInBytes = elementSize * length;
+
+ // must align by 8 bytes
+ long totalSizeInLongs = (headerInBytes + valueRegionInBytes + 7) / 8;
+ if (totalSizeInLongs > Integer.MAX_VALUE / 8) {
+ throw new UnsupportedOperationException(
+ "Cannot convert this array to unsafe format as " + "it's too big.");
+ }
+ long totalSize = totalSizeInLongs * 8;
+
+ final byte[] data = new byte[(int) totalSize];
+
+ UNSAFE.putInt(data, (long) BYTE_ARRAY_BASE_OFFSET, length);
+ UNSAFE.copyMemory(
+ arr, offset, data, BYTE_ARRAY_BASE_OFFSET + headerInBytes, valueRegionInBytes);
+
+ BinaryArrayData result = new BinaryArrayData();
+ result.pointTo(MemorySegmentFactory.wrap(data), 0, (int) totalSize);
+ return result;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryFormat.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryFormat.java
new file mode 100644
index 00000000000..8bed6c4a530
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryFormat.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.core.memory.MemorySegment;
+
+/** Binary format spanning {@link MemorySegment}s. */
+@Internal
+public interface BinaryFormat {
+
+ /**
+ * It decides whether to put data in FixLenPart or VarLenPart. See more in {@link
+ * BinaryRecordData}.
+ *
+ * If len is less than 8, its binary format is: 1-bit mark(1) = 1, 7-bits len, and 7-bytes
+ * data. Data is stored in fix-length part.
+ *
+ *
If len is greater or equal to 8, its binary format is: 1-bit mark(1) = 0, 31-bits offset
+ * to the data, and 4-bytes length of data. Data is stored in variable-length part.
+ */
+ int MAX_FIX_PART_DATA_SIZE = 7;
+
+ /**
+ * To get the mark in highest bit of long. Form: 10000000 00000000 ... (8 bytes)
+ *
+ *
This is used to decide whether the data is stored in fixed-length part or variable-length
+ * part. see {@link #MAX_FIX_PART_DATA_SIZE} for more information.
+ */
+ long HIGHEST_FIRST_BIT = 0x80L << 56;
+
+ /**
+ * To get the 7 bits length in second bit to eighth bit out of a long. Form: 01111111 00000000
+ * ... (8 bytes)
+ *
+ *
This is used to get the length of the data which is stored in this long. see {@link
+ * #MAX_FIX_PART_DATA_SIZE} for more information.
+ */
+ long HIGHEST_SECOND_TO_EIGHTH_BIT = 0x7FL << 56;
+
+ /** Gets the underlying {@link MemorySegment}s this binary format spans. */
+ MemorySegment[] getSegments();
+
+ /** Gets the start offset of this binary data in the {@link MemorySegment}s. */
+ int getOffset();
+
+ /** Gets the size in bytes of this binary data. */
+ int getSizeInBytes();
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryMapData.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryMapData.java
new file mode 100644
index 00000000000..98cc19d341a
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryMapData.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.cdc.common.data.MapData;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.core.memory.MemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.flink.cdc.common.utils.Preconditions.checkArgument;
+
+/**
+ * [4 byte(keyArray size in bytes)] + [Key BinaryArray] + [Value BinaryArray].
+ *
+ *
{@code BinaryMap} are influenced by Apache Spark UnsafeMapData.
+ */
+@Internal
+public class BinaryMapData extends BinarySection implements MapData {
+ private final BinaryArrayData keys;
+ private final BinaryArrayData values;
+
+ public BinaryMapData() {
+ keys = new BinaryArrayData();
+ values = new BinaryArrayData();
+ }
+
+ public int size() {
+ return keys.size();
+ }
+
+ @Override
+ public void pointTo(MemorySegment[] segments, int offset, int sizeInBytes) {
+ // Read the numBytes of key array from the first 4 bytes.
+ final int keyArrayBytes = BinarySegmentUtils.getInt(segments, offset);
+ assert keyArrayBytes >= 0 : "keyArraySize (" + keyArrayBytes + ") should >= 0";
+ final int valueArrayBytes = sizeInBytes - keyArrayBytes - 4;
+ assert valueArrayBytes >= 0 : "valueArraySize (" + valueArrayBytes + ") should >= 0";
+
+ keys.pointTo(segments, offset + 4, keyArrayBytes);
+ values.pointTo(segments, offset + 4 + keyArrayBytes, valueArrayBytes);
+
+ assert keys.size() == values.size();
+
+ this.segments = segments;
+ this.offset = offset;
+ this.sizeInBytes = sizeInBytes;
+ }
+
+ public BinaryArrayData keyArray() {
+ return keys;
+ }
+
+ public BinaryArrayData valueArray() {
+ return values;
+ }
+
+ public Map, ?> toJavaMap(DataType keyType, DataType valueType) {
+ Object[] keyArray = keys.toObjectArray(keyType);
+ Object[] valueArray = values.toObjectArray(valueType);
+
+ Map map = new HashMap<>();
+ for (int i = 0; i < keyArray.length; i++) {
+ map.put(keyArray[i], valueArray[i]);
+ }
+ return map;
+ }
+
+ public BinaryMapData copy() {
+ return copy(new BinaryMapData());
+ }
+
+ public BinaryMapData copy(BinaryMapData reuse) {
+ byte[] bytes = BinarySegmentUtils.copyToBytes(segments, offset, sizeInBytes);
+ reuse.pointTo(MemorySegmentFactory.wrap(bytes), 0, sizeInBytes);
+ return reuse;
+ }
+
+ @Override
+ public int hashCode() {
+ return BinarySegmentUtils.hashByWords(segments, offset, sizeInBytes);
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Construction Utilities
+ // ------------------------------------------------------------------------------------------
+
+ public static BinaryMapData valueOf(BinaryArrayData key, BinaryArrayData value) {
+ checkArgument(key.segments.length == 1 && value.getSegments().length == 1);
+ byte[] bytes = new byte[4 + key.sizeInBytes + value.sizeInBytes];
+ MemorySegment segment = MemorySegmentFactory.wrap(bytes);
+ segment.putInt(0, key.sizeInBytes);
+ key.getSegments()[0].copyTo(key.getOffset(), segment, 4, key.sizeInBytes);
+ value.getSegments()[0].copyTo(
+ value.getOffset(), segment, 4 + key.sizeInBytes, value.sizeInBytes);
+ BinaryMapData map = new BinaryMapData();
+ map.pointTo(segment, 0, bytes.length);
+ return map;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryRecordData.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryRecordData.java
new file mode 100644
index 00000000000..bc645eb7db4
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryRecordData.java
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.cdc.common.data.ArrayData;
+import org.apache.flink.cdc.common.data.DateData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.LocalZonedTimestampData;
+import org.apache.flink.cdc.common.data.MapData;
+import org.apache.flink.cdc.common.data.RecordData;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.data.TimeData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.data.ZonedTimestampData;
+import org.apache.flink.cdc.common.types.variant.BinaryVariant;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.core.memory.MemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+
+import java.nio.ByteOrder;
+
+/**
+ * An implementation of {@link RecordData} which is backed by {@link MemorySegment} instead of
+ * Object. It can significantly reduce the serialization/deserialization of Java objects.
+ *
+ * A BinaryRecordData has two part: Fixed-length part and variable-length part.
+ *
+ *
Fixed-length part contains 1 byte header and null bit set and field values. Null bit set is
+ * used for null tracking and is aligned to 8-byte word boundaries. `Field values` holds
+ * fixed-length primitive types and variable-length values which can be stored in 8 bytes inside. If
+ * it do not fit the variable-length field, then store the length and offset of variable-length
+ * part.
+ *
+ *
Fixed-length part will certainly fall into a MemorySegment, which will speed up the read and
+ * write of field. During the write phase, if the target memory segment has less space than fixed
+ * length part size, we will skip the space. So the number of fields in a single Row cannot exceed
+ * the capacity of a single MemorySegment, if there are too many fields, we suggest that user set a
+ * bigger pageSize of MemorySegment.
+ *
+ *
Variable-length part may fall into multiple MemorySegments.
+ */
+@Internal
+public final class BinaryRecordData extends BinarySection implements RecordData, NullAwareGetters {
+
+ public static final boolean LITTLE_ENDIAN =
+ (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN);
+ private static final long FIRST_BYTE_ZERO = LITTLE_ENDIAN ? ~0xFFL : ~(0xFFL << 56L);
+ public static final int HEADER_SIZE_IN_BITS = 8;
+
+ public static final String TIMESTAMP_DELIMITER = "//";
+
+ public static int calculateBitSetWidthInBytes(int arity) {
+ return ((arity + 63 + HEADER_SIZE_IN_BITS) / 64) * 8;
+ }
+
+ private final int arity;
+ private final int nullBitsSizeInBytes;
+
+ public BinaryRecordData(int arity) {
+ Preconditions.checkArgument(arity >= 0);
+ this.arity = arity;
+ this.nullBitsSizeInBytes = calculateBitSetWidthInBytes(arity);
+ }
+
+ private int getFieldOffset(int pos) {
+ return offset + nullBitsSizeInBytes + pos * 8;
+ }
+
+ private void assertIndexIsValid(int index) {
+ assert index >= 0 : "index (" + index + ") should >= 0";
+ assert index < arity : "index (" + index + ") should < " + arity;
+ }
+
+ public int getFixedLengthPartSize() {
+ return nullBitsSizeInBytes + 8 * arity;
+ }
+
+ @Override
+ public int getArity() {
+ return arity;
+ }
+
+ @Override
+ public boolean isNullAt(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.bitGet(segments[0], offset, pos + HEADER_SIZE_IN_BITS);
+ }
+
+ @Override
+ public boolean getBoolean(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getBoolean(getFieldOffset(pos));
+ }
+
+ @Override
+ public byte getByte(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].get(getFieldOffset(pos));
+ }
+
+ @Override
+ public short getShort(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getShort(getFieldOffset(pos));
+ }
+
+ @Override
+ public int getInt(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getInt(getFieldOffset(pos));
+ }
+
+ @Override
+ public long getLong(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getLong(getFieldOffset(pos));
+ }
+
+ @Override
+ public float getFloat(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getFloat(getFieldOffset(pos));
+ }
+
+ @Override
+ public double getDouble(int pos) {
+ assertIndexIsValid(pos);
+ return segments[0].getDouble(getFieldOffset(pos));
+ }
+
+ @Override
+ public StringData getString(int pos) {
+ assertIndexIsValid(pos);
+ int fieldOffset = getFieldOffset(pos);
+ final long offsetAndLen = segments[0].getLong(fieldOffset);
+ return BinarySegmentUtils.readStringData(segments, offset, fieldOffset, offsetAndLen);
+ }
+
+ @Override
+ public DecimalData getDecimal(int pos, int precision, int scale) {
+ assertIndexIsValid(pos);
+
+ if (DecimalData.isCompact(precision)) {
+ return DecimalData.fromUnscaledLong(
+ segments[0].getLong(getFieldOffset(pos)), precision, scale);
+ }
+
+ int fieldOffset = getFieldOffset(pos);
+ final long offsetAndSize = segments[0].getLong(fieldOffset);
+ return BinarySegmentUtils.readDecimalData(
+ segments, offset, offsetAndSize, precision, scale);
+ }
+
+ @Override
+ public TimestampData getTimestamp(int pos, int precision) {
+ assertIndexIsValid(pos);
+
+ int fieldOffset = getFieldOffset(pos);
+ final long offsetAndNanoOfMilli = segments[0].getLong(fieldOffset);
+ return BinarySegmentUtils.readTimestampData(segments, offset, offsetAndNanoOfMilli);
+ }
+
+ @Override
+ public ZonedTimestampData getZonedTimestamp(int pos, int precision) {
+ String[] parts = getString(pos).toString().split(TIMESTAMP_DELIMITER);
+ return ZonedTimestampData.of(
+ Long.parseLong(parts[0]), Integer.parseInt(parts[1]), parts[2]);
+ }
+
+ @Override
+ public LocalZonedTimestampData getLocalZonedTimestampData(int pos, int precision) {
+ assertIndexIsValid(pos);
+
+ int fieldOffset = getFieldOffset(pos);
+ final long offsetAndNanoOfMilli = segments[0].getLong(fieldOffset);
+ return BinarySegmentUtils.readLocalZonedTimestampData(
+ segments, offset, offsetAndNanoOfMilli);
+ }
+
+ @Override
+ public byte[] getBinary(int pos) {
+ assertIndexIsValid(pos);
+ int fieldOffset = getFieldOffset(pos);
+ final long offsetAndLen = segments[0].getLong(fieldOffset);
+ return BinarySegmentUtils.readBinary(segments, offset, fieldOffset, offsetAndLen);
+ }
+
+ @Override
+ public ArrayData getArray(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readArrayData(segments, offset, getLong(pos));
+ }
+
+ @Override
+ public MapData getMap(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readMapData(segments, offset, getLong(pos));
+ }
+
+ @Override
+ public RecordData getRow(int pos, int numFields) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readRecordData(segments, numFields, offset, getLong(pos));
+ }
+
+ @Override
+ public DateData getDate(int pos) {
+ assertIndexIsValid(pos);
+ return DateData.fromEpochDay(getInt(pos));
+ }
+
+ @Override
+ public TimeData getTime(int pos) {
+ assertIndexIsValid(pos);
+ return TimeData.fromMillisOfDay(getInt(pos));
+ }
+
+ @Override
+ public BinaryVariant getVariant(int pos) {
+ assertIndexIsValid(pos);
+ return BinarySegmentUtils.readVariant(segments, offset, getLong(pos));
+ }
+
+ /** The bit is 1 when the field is null. Default is 0. */
+ @Override
+ public boolean anyNull() {
+ // Skip the header.
+ if ((segments[0].getLong(0) & FIRST_BYTE_ZERO) != 0) {
+ return true;
+ }
+ for (int i = 8; i < nullBitsSizeInBytes; i += 8) {
+ if (segments[0].getLong(i) != 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean anyNull(int[] fields) {
+ for (int field : fields) {
+ if (isNullAt(field)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public BinaryRecordData copy() {
+ return copy(new BinaryRecordData(arity));
+ }
+
+ public BinaryRecordData copy(BinaryRecordData reuse) {
+ return copyInternal(reuse);
+ }
+
+ private BinaryRecordData copyInternal(BinaryRecordData reuse) {
+ byte[] bytes = BinarySegmentUtils.copyToBytes(segments, offset, sizeInBytes);
+ reuse.pointTo(MemorySegmentFactory.wrap(bytes), 0, sizeInBytes);
+ return reuse;
+ }
+
+ public void clear() {
+ segments = null;
+ offset = 0;
+ sizeInBytes = 0;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ // both BinaryRecordData and NestedRowData have the same memory format
+ if (!(o instanceof BinaryRecordData)) {
+ return false;
+ }
+ final BinarySection that = (BinarySection) o;
+ return sizeInBytes == that.sizeInBytes
+ && BinarySegmentUtils.equals(
+ segments, offset, that.segments, that.offset, sizeInBytes);
+ }
+
+ @Override
+ public int hashCode() {
+ return BinarySegmentUtils.hashByWords(segments, offset, sizeInBytes);
+ }
+
+ public void setTotalSize(int sizeInBytes) {
+ this.sizeInBytes = sizeInBytes;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySection.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySection.java
new file mode 100644
index 00000000000..799c91a4f96
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySection.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.core.memory.MemorySegment;
+
+/** A basic implementation of {@link BinaryFormat} which describe a section of memory. */
+@Internal
+public class BinarySection implements BinaryFormat {
+
+ protected MemorySegment[] segments;
+ protected int offset;
+ protected int sizeInBytes;
+
+ public BinarySection() {}
+
+ public BinarySection(MemorySegment[] segments, int offset, int sizeInBytes) {
+ Preconditions.checkArgument(segments != null);
+ this.segments = segments;
+ this.offset = offset;
+ this.sizeInBytes = sizeInBytes;
+ }
+
+ public final void pointTo(MemorySegment segment, int offset, int sizeInBytes) {
+ pointTo(new MemorySegment[] {segment}, offset, sizeInBytes);
+ }
+
+ public void pointTo(MemorySegment[] segments, int offset, int sizeInBytes) {
+ Preconditions.checkArgument(segments != null);
+ this.segments = segments;
+ this.offset = offset;
+ this.sizeInBytes = sizeInBytes;
+ }
+
+ public MemorySegment[] getSegments() {
+ return segments;
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ public int getSizeInBytes() {
+ return sizeInBytes;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ final BinarySection that = (BinarySection) o;
+ return sizeInBytes == that.sizeInBytes
+ && BinarySegmentUtils.equals(
+ segments, offset, that.segments, that.offset, sizeInBytes);
+ }
+
+ @Override
+ public int hashCode() {
+ return BinarySegmentUtils.hash(segments, offset, sizeInBytes);
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySegmentUtils.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySegmentUtils.java
new file mode 100644
index 00000000000..9c463106df0
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinarySegmentUtils.java
@@ -0,0 +1,1198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.cdc.common.data.ArrayData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.LocalZonedTimestampData;
+import org.apache.flink.cdc.common.data.MapData;
+import org.apache.flink.cdc.common.data.RecordData;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.types.variant.BinaryVariant;
+import org.apache.flink.core.memory.DataOutputView;
+import org.apache.flink.core.memory.MemorySegment;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import static org.apache.flink.cdc.common.data.binary.BinaryFormat.HIGHEST_FIRST_BIT;
+import static org.apache.flink.cdc.common.data.binary.BinaryFormat.HIGHEST_SECOND_TO_EIGHTH_BIT;
+import static org.apache.flink.core.memory.MemoryUtils.UNSAFE;
+
+/** Utilities for binary data segments which heavily uses {@link MemorySegment}. */
+@Internal
+public final class BinarySegmentUtils {
+
+ /** Constant that flags the byte order. */
+ public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ private static final int ADDRESS_BITS_PER_WORD = 3;
+
+ private static final int BIT_BYTE_INDEX_MASK = 7;
+
+ /**
+ * SQL execution threads is limited, not too many, so it can bear the overhead of 64K per
+ * thread.
+ */
+ private static final int MAX_BYTES_LENGTH = 1024 * 64;
+
+ private static final int MAX_CHARS_LENGTH = 1024 * 32;
+
+ private static final int BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
+
+ private static final ThreadLocal BYTES_LOCAL = new ThreadLocal<>();
+ private static final ThreadLocal CHARS_LOCAL = new ThreadLocal<>();
+
+ private BinarySegmentUtils() {
+ // do not instantiate
+ }
+
+ /**
+ * Allocate bytes that is only for temporary usage, it should not be stored in somewhere else.
+ * Use a {@link ThreadLocal} to reuse bytes to avoid overhead of byte[] new and gc.
+ *
+ * If there are methods that can only accept a byte[], instead of a MemorySegment[]
+ * parameter, we can allocate a reuse bytes and copy the MemorySegment data to byte[], then call
+ * the method. Such as String deserialization.
+ */
+ public static byte[] allocateReuseBytes(int length) {
+ byte[] bytes = BYTES_LOCAL.get();
+
+ if (bytes == null) {
+ if (length <= MAX_BYTES_LENGTH) {
+ bytes = new byte[MAX_BYTES_LENGTH];
+ BYTES_LOCAL.set(bytes);
+ } else {
+ bytes = new byte[length];
+ }
+ } else if (bytes.length < length) {
+ bytes = new byte[length];
+ }
+
+ return bytes;
+ }
+
+ public static char[] allocateReuseChars(int length) {
+ char[] chars = CHARS_LOCAL.get();
+
+ if (chars == null) {
+ if (length <= MAX_CHARS_LENGTH) {
+ chars = new char[MAX_CHARS_LENGTH];
+ CHARS_LOCAL.set(chars);
+ } else {
+ chars = new char[length];
+ }
+ } else if (chars.length < length) {
+ chars = new char[length];
+ }
+
+ return chars;
+ }
+
+ /**
+ * Copy segments to a new byte[].
+ *
+ * @param segments Source segments.
+ * @param offset Source segments offset.
+ * @param numBytes the number bytes to copy.
+ */
+ public static byte[] copyToBytes(MemorySegment[] segments, int offset, int numBytes) {
+ return copyToBytes(segments, offset, new byte[numBytes], 0, numBytes);
+ }
+
+ /**
+ * Copy segments to target byte[].
+ *
+ * @param segments Source segments.
+ * @param offset Source segments offset.
+ * @param bytes target byte[].
+ * @param bytesOffset target byte[] offset.
+ * @param numBytes the number bytes to copy.
+ */
+ public static byte[] copyToBytes(
+ MemorySegment[] segments, int offset, byte[] bytes, int bytesOffset, int numBytes) {
+ if (inFirstSegment(segments, offset, numBytes)) {
+ segments[0].get(offset, bytes, bytesOffset, numBytes);
+ } else {
+ copyMultiSegmentsToBytes(segments, offset, bytes, bytesOffset, numBytes);
+ }
+ return bytes;
+ }
+
+ public static void copyMultiSegmentsToBytes(
+ MemorySegment[] segments, int offset, byte[] bytes, int bytesOffset, int numBytes) {
+ int remainSize = numBytes;
+ for (MemorySegment segment : segments) {
+ int remain = segment.size() - offset;
+ if (remain > 0) {
+ int nCopy = Math.min(remain, remainSize);
+ segment.get(offset, bytes, numBytes - remainSize + bytesOffset, nCopy);
+ remainSize -= nCopy;
+ // next new segment.
+ offset = 0;
+ if (remainSize == 0) {
+ return;
+ }
+ } else {
+ // remain is negative, let's advance to next segment
+ // now the offset = offset - segmentSize (-remain)
+ offset = -remain;
+ }
+ }
+ }
+
+ /**
+ * Copy segments to target unsafe pointer.
+ *
+ * @param segments Source segments.
+ * @param offset The position where the bytes are started to be read from these memory segments.
+ * @param target The unsafe memory to copy the bytes to.
+ * @param pointer The position in the target unsafe memory to copy the chunk to.
+ * @param numBytes the number bytes to copy.
+ */
+ public static void copyToUnsafe(
+ MemorySegment[] segments, int offset, Object target, int pointer, int numBytes) {
+ if (inFirstSegment(segments, offset, numBytes)) {
+ segments[0].copyToUnsafe(offset, target, pointer, numBytes);
+ } else {
+ copyMultiSegmentsToUnsafe(segments, offset, target, pointer, numBytes);
+ }
+ }
+
+ private static void copyMultiSegmentsToUnsafe(
+ MemorySegment[] segments, int offset, Object target, int pointer, int numBytes) {
+ int remainSize = numBytes;
+ for (MemorySegment segment : segments) {
+ int remain = segment.size() - offset;
+ if (remain > 0) {
+ int nCopy = Math.min(remain, remainSize);
+ segment.copyToUnsafe(offset, target, numBytes - remainSize + pointer, nCopy);
+ remainSize -= nCopy;
+ // next new segment.
+ offset = 0;
+ if (remainSize == 0) {
+ return;
+ }
+ } else {
+ // remain is negative, let's advance to next segment
+ // now the offset = offset - segmentSize (-remain)
+ offset = -remain;
+ }
+ }
+ }
+
+ /**
+ * Copy bytes of segments to output view.
+ *
+ *
Note: It just copies the data in, not include the length.
+ *
+ * @param segments source segments
+ * @param offset offset for segments
+ * @param sizeInBytes size in bytes
+ * @param target target output view
+ */
+ public static void copyToView(
+ MemorySegment[] segments, int offset, int sizeInBytes, DataOutputView target)
+ throws IOException {
+ for (MemorySegment sourceSegment : segments) {
+ int curSegRemain = sourceSegment.size() - offset;
+ if (curSegRemain > 0) {
+ int copySize = Math.min(curSegRemain, sizeInBytes);
+
+ byte[] bytes = allocateReuseBytes(copySize);
+ sourceSegment.get(offset, bytes, 0, copySize);
+ target.write(bytes, 0, copySize);
+
+ sizeInBytes -= copySize;
+ offset = 0;
+ } else {
+ offset -= sourceSegment.size();
+ }
+
+ if (sizeInBytes == 0) {
+ return;
+ }
+ }
+
+ if (sizeInBytes != 0) {
+ throw new RuntimeException(
+ "No copy finished, this should be a bug, "
+ + "The remaining length is: "
+ + sizeInBytes);
+ }
+ }
+
+ /**
+ * Copy target segments from source byte[].
+ *
+ * @param segments target segments.
+ * @param offset target segments offset.
+ * @param bytes source byte[].
+ * @param bytesOffset source byte[] offset.
+ * @param numBytes the number bytes to copy.
+ */
+ public static void copyFromBytes(
+ MemorySegment[] segments, int offset, byte[] bytes, int bytesOffset, int numBytes) {
+ if (segments.length == 1) {
+ segments[0].put(offset, bytes, bytesOffset, numBytes);
+ } else {
+ copyMultiSegmentsFromBytes(segments, offset, bytes, bytesOffset, numBytes);
+ }
+ }
+
+ private static void copyMultiSegmentsFromBytes(
+ MemorySegment[] segments, int offset, byte[] bytes, int bytesOffset, int numBytes) {
+ int remainSize = numBytes;
+ for (MemorySegment segment : segments) {
+ int remain = segment.size() - offset;
+ if (remain > 0) {
+ int nCopy = Math.min(remain, remainSize);
+ segment.put(offset, bytes, numBytes - remainSize + bytesOffset, nCopy);
+ remainSize -= nCopy;
+ // next new segment.
+ offset = 0;
+ if (remainSize == 0) {
+ return;
+ }
+ } else {
+ // remain is negative, let's advance to next segment
+ // now the offset = offset - segmentSize (-remain)
+ offset = -remain;
+ }
+ }
+ }
+
+ /** Maybe not copied, if want copy, please use copyTo. */
+ public static byte[] getBytes(MemorySegment[] segments, int baseOffset, int sizeInBytes) {
+ // avoid copy if `base` is `byte[]`
+ if (segments.length == 1) {
+ byte[] heapMemory = segments[0].getHeapMemory();
+ if (baseOffset == 0 && heapMemory != null && heapMemory.length == sizeInBytes) {
+ return heapMemory;
+ } else {
+ byte[] bytes = new byte[sizeInBytes];
+ segments[0].get(baseOffset, bytes, 0, sizeInBytes);
+ return bytes;
+ }
+ } else {
+ byte[] bytes = new byte[sizeInBytes];
+ copyMultiSegmentsToBytes(segments, baseOffset, bytes, 0, sizeInBytes);
+ return bytes;
+ }
+ }
+
+ /**
+ * Equals two memory segments regions.
+ *
+ * @param segments1 Segments 1
+ * @param offset1 Offset of segments1 to start equaling
+ * @param segments2 Segments 2
+ * @param offset2 Offset of segments2 to start equaling
+ * @param len Length of the equaled memory region
+ * @return true if equal, false otherwise
+ */
+ public static boolean equals(
+ MemorySegment[] segments1,
+ int offset1,
+ MemorySegment[] segments2,
+ int offset2,
+ int len) {
+ if (inFirstSegment(segments1, offset1, len) && inFirstSegment(segments2, offset2, len)) {
+ return segments1[0].equalTo(segments2[0], offset1, offset2, len);
+ } else {
+ return equalsMultiSegments(segments1, offset1, segments2, offset2, len);
+ }
+ }
+
+ public static boolean equalsMultiSegments(
+ MemorySegment[] segments1,
+ int offset1,
+ MemorySegment[] segments2,
+ int offset2,
+ int len) {
+ if (len == 0) {
+ // quick way and avoid segSize is zero.
+ return true;
+ }
+
+ int segSize1 = segments1[0].size();
+ int segSize2 = segments2[0].size();
+
+ // find first segIndex and segOffset of segments.
+ int segIndex1 = offset1 / segSize1;
+ int segIndex2 = offset2 / segSize2;
+ int segOffset1 = offset1 - segSize1 * segIndex1; // equal to %
+ int segOffset2 = offset2 - segSize2 * segIndex2; // equal to %
+
+ while (len > 0) {
+ int equalLen = Math.min(Math.min(len, segSize1 - segOffset1), segSize2 - segOffset2);
+ if (!segments1[segIndex1].equalTo(
+ segments2[segIndex2], segOffset1, segOffset2, equalLen)) {
+ return false;
+ }
+ len -= equalLen;
+ segOffset1 += equalLen;
+ if (segOffset1 == segSize1) {
+ segOffset1 = 0;
+ segIndex1++;
+ }
+ segOffset2 += equalLen;
+ if (segOffset2 == segSize2) {
+ segOffset2 = 0;
+ segIndex2++;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * hash segments to int, numBytes must be aligned to 4 bytes.
+ *
+ * @param segments Source segments.
+ * @param offset Source segments offset.
+ * @param numBytes the number bytes to hash.
+ */
+ public static int hashByWords(MemorySegment[] segments, int offset, int numBytes) {
+ if (inFirstSegment(segments, offset, numBytes)) {
+ return MurmurHashUtils.hashBytesByWords(segments[0], offset, numBytes);
+ } else {
+ return hashMultiSegByWords(segments, offset, numBytes);
+ }
+ }
+
+ private static int hashMultiSegByWords(MemorySegment[] segments, int offset, int numBytes) {
+ byte[] bytes = allocateReuseBytes(numBytes);
+ copyMultiSegmentsToBytes(segments, offset, bytes, 0, numBytes);
+ return MurmurHashUtils.hashUnsafeBytesByWords(bytes, BYTE_ARRAY_BASE_OFFSET, numBytes);
+ }
+
+ /**
+ * hash segments to int.
+ *
+ * @param segments Source segments.
+ * @param offset Source segments offset.
+ * @param numBytes the number bytes to hash.
+ */
+ public static int hash(MemorySegment[] segments, int offset, int numBytes) {
+ if (inFirstSegment(segments, offset, numBytes)) {
+ return MurmurHashUtils.hashBytes(segments[0], offset, numBytes);
+ } else {
+ return hashMultiSeg(segments, offset, numBytes);
+ }
+ }
+
+ private static int hashMultiSeg(MemorySegment[] segments, int offset, int numBytes) {
+ byte[] bytes = allocateReuseBytes(numBytes);
+ copyMultiSegmentsToBytes(segments, offset, bytes, 0, numBytes);
+ return MurmurHashUtils.hashUnsafeBytes(bytes, BYTE_ARRAY_BASE_OFFSET, numBytes);
+ }
+
+ /** Is it just in first MemorySegment, we use quick way to do something. */
+ private static boolean inFirstSegment(MemorySegment[] segments, int offset, int numBytes) {
+ return numBytes + offset <= segments[0].size();
+ }
+
+ /**
+ * Given a bit index, return the byte index containing it.
+ *
+ * @param bitIndex the bit index.
+ * @return the byte index.
+ */
+ private static int byteIndex(int bitIndex) {
+ return bitIndex >>> ADDRESS_BITS_PER_WORD;
+ }
+
+ /**
+ * unset bit.
+ *
+ * @param segment target segment.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static void bitUnSet(MemorySegment segment, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ byte current = segment.get(offset);
+ current &= ~(1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(offset, current);
+ }
+
+ /**
+ * set bit.
+ *
+ * @param segment target segment.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static void bitSet(MemorySegment segment, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ byte current = segment.get(offset);
+ current |= (1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(offset, current);
+ }
+
+ /**
+ * read bit.
+ *
+ * @param segment target segment.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static boolean bitGet(MemorySegment segment, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ byte current = segment.get(offset);
+ return (current & (1 << (index & BIT_BYTE_INDEX_MASK))) != 0;
+ }
+
+ /**
+ * unset bit from segments.
+ *
+ * @param segments target segments.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static void bitUnSet(MemorySegment[] segments, int baseOffset, int index) {
+ if (segments.length == 1) {
+ MemorySegment segment = segments[0];
+ int offset = baseOffset + byteIndex(index);
+ byte current = segment.get(offset);
+ current &= ~(1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(offset, current);
+ } else {
+ bitUnSetMultiSegments(segments, baseOffset, index);
+ }
+ }
+
+ private static void bitUnSetMultiSegments(MemorySegment[] segments, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ MemorySegment segment = segments[segIndex];
+
+ byte current = segment.get(segOffset);
+ current &= ~(1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(segOffset, current);
+ }
+
+ /**
+ * set bit from segments.
+ *
+ * @param segments target segments.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static void bitSet(MemorySegment[] segments, int baseOffset, int index) {
+ if (segments.length == 1) {
+ int offset = baseOffset + byteIndex(index);
+ MemorySegment segment = segments[0];
+ byte current = segment.get(offset);
+ current |= (1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(offset, current);
+ } else {
+ bitSetMultiSegments(segments, baseOffset, index);
+ }
+ }
+
+ private static void bitSetMultiSegments(MemorySegment[] segments, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ MemorySegment segment = segments[segIndex];
+
+ byte current = segment.get(segOffset);
+ current |= (1 << (index & BIT_BYTE_INDEX_MASK));
+ segment.put(segOffset, current);
+ }
+
+ /**
+ * read bit from segments.
+ *
+ * @param segments target segments.
+ * @param baseOffset bits base offset.
+ * @param index bit index from base offset.
+ */
+ public static boolean bitGet(MemorySegment[] segments, int baseOffset, int index) {
+ int offset = baseOffset + byteIndex(index);
+ byte current = getByte(segments, offset);
+ return (current & (1 << (index & BIT_BYTE_INDEX_MASK))) != 0;
+ }
+
+ /**
+ * get boolean from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static boolean getBoolean(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 1)) {
+ return segments[0].getBoolean(offset);
+ } else {
+ return getBooleanMultiSegments(segments, offset);
+ }
+ }
+
+ private static boolean getBooleanMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ return segments[segIndex].getBoolean(segOffset);
+ }
+
+ /**
+ * set boolean from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setBoolean(MemorySegment[] segments, int offset, boolean value) {
+ if (inFirstSegment(segments, offset, 1)) {
+ segments[0].putBoolean(offset, value);
+ } else {
+ setBooleanMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setBooleanMultiSegments(
+ MemorySegment[] segments, int offset, boolean value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ segments[segIndex].putBoolean(segOffset, value);
+ }
+
+ /**
+ * get byte from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static byte getByte(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 1)) {
+ return segments[0].get(offset);
+ } else {
+ return getByteMultiSegments(segments, offset);
+ }
+ }
+
+ private static byte getByteMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ return segments[segIndex].get(segOffset);
+ }
+
+ /**
+ * set byte from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setByte(MemorySegment[] segments, int offset, byte value) {
+ if (inFirstSegment(segments, offset, 1)) {
+ segments[0].put(offset, value);
+ } else {
+ setByteMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setByteMultiSegments(MemorySegment[] segments, int offset, byte value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+ segments[segIndex].put(segOffset, value);
+ }
+
+ /**
+ * get int from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static int getInt(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 4)) {
+ return segments[0].getInt(offset);
+ } else {
+ return getIntMultiSegments(segments, offset);
+ }
+ }
+
+ private static int getIntMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 3) {
+ return segments[segIndex].getInt(segOffset);
+ } else {
+ return getIntSlowly(segments, segSize, segIndex, segOffset);
+ }
+ }
+
+ private static int getIntSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset) {
+ MemorySegment segment = segments[segNum];
+ int ret = 0;
+ for (int i = 0; i < 4; i++) {
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ int unsignedByte = segment.get(segOffset) & 0xff;
+ if (LITTLE_ENDIAN) {
+ ret |= (unsignedByte << (i * 8));
+ } else {
+ ret |= (unsignedByte << ((3 - i) * 8));
+ }
+ segOffset++;
+ }
+ return ret;
+ }
+
+ /**
+ * set int from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setInt(MemorySegment[] segments, int offset, int value) {
+ if (inFirstSegment(segments, offset, 4)) {
+ segments[0].putInt(offset, value);
+ } else {
+ setIntMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setIntMultiSegments(MemorySegment[] segments, int offset, int value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 3) {
+ segments[segIndex].putInt(segOffset, value);
+ } else {
+ setIntSlowly(segments, segSize, segIndex, segOffset, value);
+ }
+ }
+
+ private static void setIntSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset, int value) {
+ MemorySegment segment = segments[segNum];
+ for (int i = 0; i < 4; i++) {
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ int unsignedByte;
+ if (LITTLE_ENDIAN) {
+ unsignedByte = value >> (i * 8);
+ } else {
+ unsignedByte = value >> ((3 - i) * 8);
+ }
+ segment.put(segOffset, (byte) unsignedByte);
+ segOffset++;
+ }
+ }
+
+ /**
+ * get long from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static long getLong(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 8)) {
+ return segments[0].getLong(offset);
+ } else {
+ return getLongMultiSegments(segments, offset);
+ }
+ }
+
+ private static long getLongMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 7) {
+ return segments[segIndex].getLong(segOffset);
+ } else {
+ return getLongSlowly(segments, segSize, segIndex, segOffset);
+ }
+ }
+
+ private static long getLongSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset) {
+ MemorySegment segment = segments[segNum];
+ long ret = 0;
+ for (int i = 0; i < 8; i++) {
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ long unsignedByte = segment.get(segOffset) & 0xff;
+ if (LITTLE_ENDIAN) {
+ ret |= (unsignedByte << (i * 8));
+ } else {
+ ret |= (unsignedByte << ((7 - i) * 8));
+ }
+ segOffset++;
+ }
+ return ret;
+ }
+
+ /**
+ * set long from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setLong(MemorySegment[] segments, int offset, long value) {
+ if (inFirstSegment(segments, offset, 8)) {
+ segments[0].putLong(offset, value);
+ } else {
+ setLongMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setLongMultiSegments(MemorySegment[] segments, int offset, long value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 7) {
+ segments[segIndex].putLong(segOffset, value);
+ } else {
+ setLongSlowly(segments, segSize, segIndex, segOffset, value);
+ }
+ }
+
+ private static void setLongSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset, long value) {
+ MemorySegment segment = segments[segNum];
+ for (int i = 0; i < 8; i++) {
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ long unsignedByte;
+ if (LITTLE_ENDIAN) {
+ unsignedByte = value >> (i * 8);
+ } else {
+ unsignedByte = value >> ((7 - i) * 8);
+ }
+ segment.put(segOffset, (byte) unsignedByte);
+ segOffset++;
+ }
+ }
+
+ /**
+ * get short from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static short getShort(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 2)) {
+ return segments[0].getShort(offset);
+ } else {
+ return getShortMultiSegments(segments, offset);
+ }
+ }
+
+ private static short getShortMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 1) {
+ return segments[segIndex].getShort(segOffset);
+ } else {
+ return (short) getTwoByteSlowly(segments, segSize, segIndex, segOffset);
+ }
+ }
+
+ /**
+ * set short from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setShort(MemorySegment[] segments, int offset, short value) {
+ if (inFirstSegment(segments, offset, 2)) {
+ segments[0].putShort(offset, value);
+ } else {
+ setShortMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setShortMultiSegments(MemorySegment[] segments, int offset, short value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 1) {
+ segments[segIndex].putShort(segOffset, value);
+ } else {
+ setTwoByteSlowly(segments, segSize, segIndex, segOffset, value, value >> 8);
+ }
+ }
+
+ /**
+ * get float from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static float getFloat(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 4)) {
+ return segments[0].getFloat(offset);
+ } else {
+ return getFloatMultiSegments(segments, offset);
+ }
+ }
+
+ private static float getFloatMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 3) {
+ return segments[segIndex].getFloat(segOffset);
+ } else {
+ return Float.intBitsToFloat(getIntSlowly(segments, segSize, segIndex, segOffset));
+ }
+ }
+
+ /**
+ * set float from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setFloat(MemorySegment[] segments, int offset, float value) {
+ if (inFirstSegment(segments, offset, 4)) {
+ segments[0].putFloat(offset, value);
+ } else {
+ setFloatMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setFloatMultiSegments(MemorySegment[] segments, int offset, float value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 3) {
+ segments[segIndex].putFloat(segOffset, value);
+ } else {
+ setIntSlowly(segments, segSize, segIndex, segOffset, Float.floatToRawIntBits(value));
+ }
+ }
+
+ /**
+ * get double from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static double getDouble(MemorySegment[] segments, int offset) {
+ if (inFirstSegment(segments, offset, 8)) {
+ return segments[0].getDouble(offset);
+ } else {
+ return getDoubleMultiSegments(segments, offset);
+ }
+ }
+
+ private static double getDoubleMultiSegments(MemorySegment[] segments, int offset) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 7) {
+ return segments[segIndex].getDouble(segOffset);
+ } else {
+ return Double.longBitsToDouble(getLongSlowly(segments, segSize, segIndex, segOffset));
+ }
+ }
+
+ /**
+ * set double from segments.
+ *
+ * @param segments target segments.
+ * @param offset value offset.
+ */
+ public static void setDouble(MemorySegment[] segments, int offset, double value) {
+ if (inFirstSegment(segments, offset, 8)) {
+ segments[0].putDouble(offset, value);
+ } else {
+ setDoubleMultiSegments(segments, offset, value);
+ }
+ }
+
+ private static void setDoubleMultiSegments(MemorySegment[] segments, int offset, double value) {
+ int segSize = segments[0].size();
+ int segIndex = offset / segSize;
+ int segOffset = offset - segIndex * segSize; // equal to %
+
+ if (segOffset < segSize - 7) {
+ segments[segIndex].putDouble(segOffset, value);
+ } else {
+ setLongSlowly(
+ segments, segSize, segIndex, segOffset, Double.doubleToRawLongBits(value));
+ }
+ }
+
+ private static int getTwoByteSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset) {
+ MemorySegment segment = segments[segNum];
+ int ret = 0;
+ for (int i = 0; i < 2; i++) {
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ int unsignedByte = segment.get(segOffset) & 0xff;
+ if (LITTLE_ENDIAN) {
+ ret |= (unsignedByte << (i * 8));
+ } else {
+ ret |= (unsignedByte << ((1 - i) * 8));
+ }
+ segOffset++;
+ }
+ return ret;
+ }
+
+ private static void setTwoByteSlowly(
+ MemorySegment[] segments, int segSize, int segNum, int segOffset, int b1, int b2) {
+ MemorySegment segment = segments[segNum];
+ segment.put(segOffset, (byte) (LITTLE_ENDIAN ? b1 : b2));
+ segOffset++;
+ if (segOffset == segSize) {
+ segment = segments[++segNum];
+ segOffset = 0;
+ }
+ segment.put(segOffset, (byte) (LITTLE_ENDIAN ? b2 : b1));
+ }
+
+ /** Gets an instance of {@link DecimalData} from underlying {@link MemorySegment}. */
+ public static DecimalData readDecimalData(
+ MemorySegment[] segments,
+ int baseOffset,
+ long offsetAndSize,
+ int precision,
+ int scale) {
+ final int size = ((int) offsetAndSize);
+ int subOffset = (int) (offsetAndSize >> 32);
+ byte[] bytes = new byte[size];
+ copyToBytes(segments, baseOffset + subOffset, bytes, 0, size);
+ return DecimalData.fromUnscaledBytes(bytes, precision, scale);
+ }
+
+ /**
+ * Gets an instance of {@link TimestampData} from underlying {@link MemorySegment}.
+ *
+ * @param segments the underlying MemorySegments
+ * @param baseOffset the base offset of current instance of {@code TimestampData}
+ * @param offsetAndNanos the offset of milli-seconds part and nanoseconds
+ * @return an instance of {@link TimestampData}
+ */
+ public static TimestampData readTimestampData(
+ MemorySegment[] segments, int baseOffset, long offsetAndNanos) {
+ final int nanoOfMillisecond = (int) offsetAndNanos;
+ final int subOffset = (int) (offsetAndNanos >> 32);
+ final long millisecond = getLong(segments, baseOffset + subOffset);
+ return TimestampData.fromMillis(millisecond, nanoOfMillisecond);
+ }
+
+ /**
+ * Gets an instance of {@link LocalZonedTimestampData} from underlying {@link MemorySegment}.
+ *
+ * @param segments the underlying MemorySegments
+ * @param baseOffset the base offset of current instance of {@code TimestampData}
+ * @param offsetAndNanos the offset of milli-seconds part and nanoseconds
+ * @return an instance of {@link LocalZonedTimestampData}
+ */
+ public static LocalZonedTimestampData readLocalZonedTimestampData(
+ MemorySegment[] segments, int baseOffset, long offsetAndNanos) {
+ final int nanoOfMillisecond = (int) offsetAndNanos;
+ final int subOffset = (int) (offsetAndNanos >> 32);
+ final long millisecond = getLong(segments, baseOffset + subOffset);
+ return LocalZonedTimestampData.fromEpochMillis(millisecond, nanoOfMillisecond);
+ }
+
+ /**
+ * Get binary, if len less than 8, will be include in variablePartOffsetAndLen.
+ *
+ *
Note: Need to consider the ByteOrder.
+ *
+ * @param baseOffset base offset of composite binary format.
+ * @param fieldOffset absolute start offset of 'variablePartOffsetAndLen'.
+ * @param variablePartOffsetAndLen a long value, real data or offset and len.
+ */
+ public static byte[] readBinary(
+ MemorySegment[] segments,
+ int baseOffset,
+ int fieldOffset,
+ long variablePartOffsetAndLen) {
+ long mark = variablePartOffsetAndLen & HIGHEST_FIRST_BIT;
+ if (mark == 0) {
+ final int subOffset = (int) (variablePartOffsetAndLen >> 32);
+ final int len = (int) variablePartOffsetAndLen;
+ return BinarySegmentUtils.copyToBytes(segments, baseOffset + subOffset, len);
+ } else {
+ int len = (int) ((variablePartOffsetAndLen & HIGHEST_SECOND_TO_EIGHTH_BIT) >>> 56);
+ if (BinarySegmentUtils.LITTLE_ENDIAN) {
+ return BinarySegmentUtils.copyToBytes(segments, fieldOffset, len);
+ } else {
+ // fieldOffset + 1 to skip header.
+ return BinarySegmentUtils.copyToBytes(segments, fieldOffset + 1, len);
+ }
+ }
+ }
+
+ /**
+ * Get binary string, if len less than 8, will be include in variablePartOffsetAndLen.
+ *
+ *
Note: Need to consider the ByteOrder.
+ *
+ * @param baseOffset base offset of composite binary format.
+ * @param fieldOffset absolute start offset of 'variablePartOffsetAndLen'.
+ * @param variablePartOffsetAndLen a long value, real data or offset and len.
+ */
+ public static StringData readStringData(
+ MemorySegment[] segments,
+ int baseOffset,
+ int fieldOffset,
+ long variablePartOffsetAndLen) {
+ long mark = variablePartOffsetAndLen & HIGHEST_FIRST_BIT;
+ if (mark == 0) {
+ final int subOffset = (int) (variablePartOffsetAndLen >> 32);
+ final int len = (int) variablePartOffsetAndLen;
+ return BinaryStringData.fromAddress(segments, baseOffset + subOffset, len);
+ } else {
+ int len = (int) ((variablePartOffsetAndLen & HIGHEST_SECOND_TO_EIGHTH_BIT) >>> 56);
+ if (BinarySegmentUtils.LITTLE_ENDIAN) {
+ return BinaryStringData.fromAddress(segments, fieldOffset, len);
+ } else {
+ // fieldOffset + 1 to skip header.
+ return BinaryStringData.fromAddress(segments, fieldOffset + 1, len);
+ }
+ }
+ }
+
+ /** Gets an instance of {@link RecordData} from underlying {@link MemorySegment}. */
+ public static RecordData readRecordData(
+ MemorySegment[] segments, int numFields, int baseOffset, long offsetAndSize) {
+ final int size = ((int) offsetAndSize);
+ int offset = (int) (offsetAndSize >> 32);
+ BinaryRecordData recordData = new BinaryRecordData(numFields);
+ recordData.pointTo(segments, offset + baseOffset, size);
+ return recordData;
+ }
+
+ public static BinaryVariant readVariant(
+ MemorySegment[] segments, int baseOffset, long offsetAndSize) {
+ final int size = ((int) offsetAndSize);
+ int offset = (int) (offsetAndSize >> 32);
+ byte[] bytes = copyToBytes(segments, offset + baseOffset, size);
+ ByteBuffer buffer = ByteBuffer.wrap(bytes);
+ int metaLen = buffer.getInt();
+ int valueLen = bytes.length - 4 - metaLen;
+
+ byte[] meta = new byte[metaLen];
+ byte[] value = new byte[valueLen];
+ buffer.get(meta, 0, metaLen);
+ buffer.get(value, 0, valueLen);
+
+ return new BinaryVariant(value, meta);
+ }
+
+ /**
+ * Find equal segments2 in segments1.
+ *
+ * @param segments1 segs to find.
+ * @param segments2 sub segs.
+ * @return Return the found offset, return -1 if not find.
+ */
+ public static int find(
+ MemorySegment[] segments1,
+ int offset1,
+ int numBytes1,
+ MemorySegment[] segments2,
+ int offset2,
+ int numBytes2) {
+ if (numBytes2 == 0) { // quick way 1.
+ return offset1;
+ }
+ if (inFirstSegment(segments1, offset1, numBytes1)
+ && inFirstSegment(segments2, offset2, numBytes2)) {
+ byte first = segments2[0].get(offset2);
+ int end = numBytes1 - numBytes2 + offset1;
+ for (int i = offset1; i <= end; i++) {
+ // quick way 2: equal first byte.
+ if (segments1[0].get(i) == first
+ && segments1[0].equalTo(segments2[0], i, offset2, numBytes2)) {
+ return i;
+ }
+ }
+ return -1;
+ } else {
+ return findInMultiSegments(
+ segments1, offset1, numBytes1, segments2, offset2, numBytes2);
+ }
+ }
+
+ private static int findInMultiSegments(
+ MemorySegment[] segments1,
+ int offset1,
+ int numBytes1,
+ MemorySegment[] segments2,
+ int offset2,
+ int numBytes2) {
+ int end = numBytes1 - numBytes2 + offset1;
+ for (int i = offset1; i <= end; i++) {
+ if (equalsMultiSegments(segments1, i, segments2, offset2, numBytes2)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /** Gets an instance of {@link MapData} from underlying {@link MemorySegment}. */
+ public static MapData readMapData(
+ MemorySegment[] segments, int baseOffset, long offsetAndSize) {
+ final int size = ((int) offsetAndSize);
+ int offset = (int) (offsetAndSize >> 32);
+ BinaryMapData map = new BinaryMapData();
+ map.pointTo(segments, offset + baseOffset, size);
+ return map;
+ }
+
+ /** Gets an instance of {@link ArrayData} from underlying {@link MemorySegment}. */
+ public static ArrayData readArrayData(
+ MemorySegment[] segments, int baseOffset, long offsetAndSize) {
+ final int size = ((int) offsetAndSize);
+ int offset = (int) (offsetAndSize >> 32);
+ BinaryArrayData array = new BinaryArrayData();
+ array.pointTo(segments, offset + baseOffset, size);
+ return array;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryStringData.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryStringData.java
new file mode 100644
index 00000000000..52af6a25c49
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/BinaryStringData.java
@@ -0,0 +1,875 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.cdc.common.utils.StringUtf8Utils;
+import org.apache.flink.core.memory.MemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+
+import javax.annotation.Nonnull;
+
+import java.util.Arrays;
+
+/**
+ * A lazily binary implementation of {@link StringData} which is backed by {@link MemorySegment}s
+ * and {@link String}.
+ *
+ *
Either {@link MemorySegment}s or {@link String} must be provided when constructing {@link
+ * BinaryStringData}. The other representation will be materialized when needed.
+ *
+ *
It provides many useful methods for comparison, search, and so on.
+ */
+@Internal
+public final class BinaryStringData extends LazyBinaryFormat implements StringData {
+
+ public static final BinaryStringData EMPTY_UTF8 =
+ BinaryStringData.fromBytes(StringUtf8Utils.encodeUTF8(""));
+
+ public BinaryStringData() {}
+
+ public BinaryStringData(String javaObject) {
+ super(javaObject);
+ }
+
+ public BinaryStringData(MemorySegment[] segments, int offset, int sizeInBytes) {
+ super(segments, offset, sizeInBytes);
+ }
+
+ public BinaryStringData(
+ MemorySegment[] segments, int offset, int sizeInBytes, String javaObject) {
+ super(segments, offset, sizeInBytes, javaObject);
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Construction Utilities
+ // ------------------------------------------------------------------------------------------
+
+ /**
+ * Creates a {@link BinaryStringData} instance from the given address (base and offset) and
+ * length.
+ */
+ public static BinaryStringData fromAddress(MemorySegment[] segments, int offset, int numBytes) {
+ return new BinaryStringData(segments, offset, numBytes);
+ }
+
+ /** Creates a {@link BinaryStringData} instance from the given Java string. */
+ public static BinaryStringData fromString(String str) {
+ if (str == null) {
+ return null;
+ } else {
+ return new BinaryStringData(str);
+ }
+ }
+
+ /** Creates a {@link BinaryStringData} instance from the given UTF-8 bytes. */
+ public static BinaryStringData fromBytes(byte[] bytes) {
+ return fromBytes(bytes, 0, bytes.length);
+ }
+
+ /**
+ * Creates a {@link BinaryStringData} instance from the given UTF-8 bytes with offset and number
+ * of bytes.
+ */
+ public static BinaryStringData fromBytes(byte[] bytes, int offset, int numBytes) {
+ return new BinaryStringData(
+ new MemorySegment[] {MemorySegmentFactory.wrap(bytes)}, offset, numBytes);
+ }
+
+ /** Creates a {@link BinaryStringData} instance that contains `length` spaces. */
+ public static BinaryStringData blankString(int length) {
+ byte[] spaces = new byte[length];
+ Arrays.fill(spaces, (byte) ' ');
+ return fromBytes(spaces);
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Public Interfaces
+ // ------------------------------------------------------------------------------------------
+
+ @Override
+ public byte[] toBytes() {
+ ensureMaterialized();
+ return BinarySegmentUtils.getBytes(
+ binarySection.segments, binarySection.offset, binarySection.sizeInBytes);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof BinaryStringData) {
+ BinaryStringData other = (BinaryStringData) o;
+ if (javaObject != null && other.javaObject != null) {
+ return javaObject.equals(other.javaObject);
+ }
+
+ ensureMaterialized();
+ other.ensureMaterialized();
+ return binarySection.equals(other.binarySection);
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ ensureMaterialized();
+ return binarySection.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ if (javaObject == null) {
+ byte[] bytes = BinarySegmentUtils.allocateReuseBytes(binarySection.sizeInBytes);
+ BinarySegmentUtils.copyToBytes(
+ binarySection.segments,
+ binarySection.offset,
+ bytes,
+ 0,
+ binarySection.sizeInBytes);
+ javaObject = StringUtf8Utils.decodeUTF8(bytes, 0, binarySection.sizeInBytes);
+ }
+ return javaObject;
+ }
+
+ /**
+ * Compares two strings lexicographically. Since UTF-8 uses groups of six bits, it is sometimes
+ * useful to use octal notation which uses 3-bit groups. With a calculator which can convert
+ * between hexadecimal and octal it can be easier to manually create or interpret UTF-8 compared
+ * with using binary. So we just compare the binary.
+ */
+ @Override
+ public int compareTo(@Nonnull StringData o) {
+ // BinaryStringData is the only implementation of StringData
+ BinaryStringData other = (BinaryStringData) o;
+ if (javaObject != null && other.javaObject != null) {
+ return javaObject.compareTo(other.javaObject);
+ }
+
+ ensureMaterialized();
+ other.ensureMaterialized();
+ if (binarySection.segments.length == 1 && other.binarySection.segments.length == 1) {
+
+ int len = Math.min(binarySection.sizeInBytes, other.binarySection.sizeInBytes);
+ MemorySegment seg1 = binarySection.segments[0];
+ MemorySegment seg2 = other.binarySection.segments[0];
+
+ for (int i = 0; i < len; i++) {
+ int res =
+ (seg1.get(binarySection.offset + i) & 0xFF)
+ - (seg2.get(other.binarySection.offset + i) & 0xFF);
+ if (res != 0) {
+ return res;
+ }
+ }
+ return binarySection.sizeInBytes - other.binarySection.sizeInBytes;
+ }
+
+ // if there are multi segments.
+ return compareMultiSegments(other);
+ }
+
+ /** Find the boundaries of segments, and then compare MemorySegment. */
+ private int compareMultiSegments(BinaryStringData other) {
+
+ if (binarySection.sizeInBytes == 0 || other.binarySection.sizeInBytes == 0) {
+ return binarySection.sizeInBytes - other.binarySection.sizeInBytes;
+ }
+
+ int len = Math.min(binarySection.sizeInBytes, other.binarySection.sizeInBytes);
+
+ MemorySegment seg1 = binarySection.segments[0];
+ MemorySegment seg2 = other.binarySection.segments[0];
+
+ int segmentSize = binarySection.segments[0].size();
+ int otherSegmentSize = other.binarySection.segments[0].size();
+
+ int sizeOfFirst1 = segmentSize - binarySection.offset;
+ int sizeOfFirst2 = otherSegmentSize - other.binarySection.offset;
+
+ int varSegIndex1 = 1;
+ int varSegIndex2 = 1;
+
+ // find the first segment of this string.
+ while (sizeOfFirst1 <= 0) {
+ sizeOfFirst1 += segmentSize;
+ seg1 = binarySection.segments[varSegIndex1++];
+ }
+
+ while (sizeOfFirst2 <= 0) {
+ sizeOfFirst2 += otherSegmentSize;
+ seg2 = other.binarySection.segments[varSegIndex2++];
+ }
+
+ int offset1 = segmentSize - sizeOfFirst1;
+ int offset2 = otherSegmentSize - sizeOfFirst2;
+
+ int needCompare = Math.min(Math.min(sizeOfFirst1, sizeOfFirst2), len);
+
+ while (needCompare > 0) {
+ // compare in one segment.
+ for (int i = 0; i < needCompare; i++) {
+ int res = (seg1.get(offset1 + i) & 0xFF) - (seg2.get(offset2 + i) & 0xFF);
+ if (res != 0) {
+ return res;
+ }
+ }
+ if (needCompare == len) {
+ break;
+ }
+ len -= needCompare;
+ // next segment
+ if (sizeOfFirst1 < sizeOfFirst2) { // I am smaller
+ seg1 = binarySection.segments[varSegIndex1++];
+ offset1 = 0;
+ offset2 += needCompare;
+ sizeOfFirst1 = segmentSize;
+ sizeOfFirst2 -= needCompare;
+ } else if (sizeOfFirst1 > sizeOfFirst2) { // other is smaller
+ seg2 = other.binarySection.segments[varSegIndex2++];
+ offset2 = 0;
+ offset1 += needCompare;
+ sizeOfFirst2 = otherSegmentSize;
+ sizeOfFirst1 -= needCompare;
+ } else { // same, should go ahead both.
+ seg1 = binarySection.segments[varSegIndex1++];
+ seg2 = other.binarySection.segments[varSegIndex2++];
+ offset1 = 0;
+ offset2 = 0;
+ sizeOfFirst1 = segmentSize;
+ sizeOfFirst2 = otherSegmentSize;
+ }
+ needCompare = Math.min(Math.min(sizeOfFirst1, sizeOfFirst2), len);
+ }
+
+ Preconditions.checkArgument(needCompare == len);
+
+ return binarySection.sizeInBytes - other.binarySection.sizeInBytes;
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Public methods on BinaryStringData
+ // ------------------------------------------------------------------------------------------
+
+ /** Returns the number of UTF-8 code points in the string. */
+ public int numChars() {
+ ensureMaterialized();
+ if (inFirstSegment()) {
+ int len = 0;
+ for (int i = 0;
+ i < binarySection.sizeInBytes;
+ i += numBytesForFirstByte(getByteOneSegment(i))) {
+ len++;
+ }
+ return len;
+ } else {
+ return numCharsMultiSegs();
+ }
+ }
+
+ private int numCharsMultiSegs() {
+ int len = 0;
+ int segSize = binarySection.segments[0].size();
+ SegmentAndOffset index = firstSegmentAndOffset(segSize);
+ int i = 0;
+ while (i < binarySection.sizeInBytes) {
+ int charBytes = numBytesForFirstByte(index.value());
+ i += charBytes;
+ len++;
+ index.skipBytes(charBytes, segSize);
+ }
+ return len;
+ }
+
+ /**
+ * Returns the {@code byte} value at the specified index. An index ranges from {@code 0} to
+ * {@code binarySection.sizeInBytes - 1}.
+ *
+ * @param index the index of the {@code byte} value.
+ * @return the {@code byte} value at the specified index of this UTF-8 bytes.
+ * @exception IndexOutOfBoundsException if the {@code index} argument is negative or not less
+ * than the length of this UTF-8 bytes.
+ */
+ public byte byteAt(int index) {
+ ensureMaterialized();
+ int globalOffset = binarySection.offset + index;
+ int size = binarySection.segments[0].size();
+ if (globalOffset < size) {
+ return binarySection.segments[0].get(globalOffset);
+ } else {
+ return binarySection.segments[globalOffset / size].get(globalOffset % size);
+ }
+ }
+
+ @Override
+ public MemorySegment[] getSegments() {
+ ensureMaterialized();
+ return super.getSegments();
+ }
+
+ @Override
+ public int getOffset() {
+ ensureMaterialized();
+ return super.getOffset();
+ }
+
+ @Override
+ public int getSizeInBytes() {
+ ensureMaterialized();
+ return super.getSizeInBytes();
+ }
+
+ public void ensureMaterialized() {
+ ensureMaterialized(null);
+ }
+
+ @Override
+ protected BinarySection materialize(TypeSerializer serializer) {
+ if (serializer != null) {
+ throw new IllegalArgumentException(
+ "BinaryStringData does not support custom serializers");
+ }
+
+ byte[] bytes = StringUtf8Utils.encodeUTF8(javaObject);
+ return new BinarySection(
+ new MemorySegment[] {MemorySegmentFactory.wrap(bytes)}, 0, bytes.length);
+ }
+
+ /** Copy a new {@code BinaryStringData}. */
+ public BinaryStringData copy() {
+ ensureMaterialized();
+ byte[] copy =
+ BinarySegmentUtils.copyToBytes(
+ binarySection.segments, binarySection.offset, binarySection.sizeInBytes);
+ return new BinaryStringData(
+ new MemorySegment[] {MemorySegmentFactory.wrap(copy)},
+ 0,
+ binarySection.sizeInBytes,
+ javaObject);
+ }
+
+ /**
+ * Returns a binary string that is a substring of this binary string. The substring begins at
+ * the specified {@code beginIndex} and extends to the character at index {@code endIndex - 1}.
+ *
+ * Examples:
+ *
+ *
+ *
+ *
+ * fromString("hamburger").substring(4, 8) returns binary string "urge"
+ * fromString("smiles").substring(1, 5) returns binary string "mile"
+ *
+ *
+ *
+ *
+ * @param beginIndex the beginning index, inclusive.
+ * @param endIndex the ending index, exclusive.
+ * @return the specified substring, return EMPTY_UTF8 when index out of bounds instead of
+ * StringIndexOutOfBoundsException.
+ */
+ public BinaryStringData substring(int beginIndex, int endIndex) {
+ ensureMaterialized();
+ if (endIndex <= beginIndex || beginIndex >= binarySection.sizeInBytes) {
+ return EMPTY_UTF8;
+ }
+ if (inFirstSegment()) {
+ MemorySegment segment = binarySection.segments[0];
+ int i = 0;
+ int c = 0;
+ while (i < binarySection.sizeInBytes && c < beginIndex) {
+ i += numBytesForFirstByte(segment.get(i + binarySection.offset));
+ c += 1;
+ }
+
+ int j = i;
+ while (i < binarySection.sizeInBytes && c < endIndex) {
+ i += numBytesForFirstByte(segment.get(i + binarySection.offset));
+ c += 1;
+ }
+
+ if (i > j) {
+ byte[] bytes = new byte[i - j];
+ segment.get(binarySection.offset + j, bytes, 0, i - j);
+ return fromBytes(bytes);
+ } else {
+ return EMPTY_UTF8;
+ }
+ } else {
+ return substringMultiSegs(beginIndex, endIndex);
+ }
+ }
+
+ private BinaryStringData substringMultiSegs(final int start, final int until) {
+ int segSize = binarySection.segments[0].size();
+ SegmentAndOffset index = firstSegmentAndOffset(segSize);
+ int i = 0;
+ int c = 0;
+ while (i < binarySection.sizeInBytes && c < start) {
+ int charSize = numBytesForFirstByte(index.value());
+ i += charSize;
+ index.skipBytes(charSize, segSize);
+ c += 1;
+ }
+
+ int j = i;
+ while (i < binarySection.sizeInBytes && c < until) {
+ int charSize = numBytesForFirstByte(index.value());
+ i += charSize;
+ index.skipBytes(charSize, segSize);
+ c += 1;
+ }
+
+ if (i > j) {
+ return fromBytes(
+ BinarySegmentUtils.copyToBytes(
+ binarySection.segments, binarySection.offset + j, i - j));
+ } else {
+ return EMPTY_UTF8;
+ }
+ }
+
+ /**
+ * Returns true if and only if this BinaryStringData contains the specified sequence of bytes
+ * values.
+ *
+ * @param s the sequence to search for
+ * @return true if this BinaryStringData contains {@code s}, false otherwise
+ */
+ public boolean contains(final BinaryStringData s) {
+ ensureMaterialized();
+ s.ensureMaterialized();
+ if (s.binarySection.sizeInBytes == 0) {
+ return true;
+ }
+ int find =
+ BinarySegmentUtils.find(
+ binarySection.segments,
+ binarySection.offset,
+ binarySection.sizeInBytes,
+ s.binarySection.segments,
+ s.binarySection.offset,
+ s.binarySection.sizeInBytes);
+ return find != -1;
+ }
+
+ /**
+ * Tests if this BinaryStringData starts with the specified prefix.
+ *
+ * @param prefix the prefix.
+ * @return {@code true} if the bytes represented by the argument is a prefix of the bytes
+ * represented by this string; {@code false} otherwise. Note also that {@code true} will be
+ * returned if the argument is an empty BinaryStringData or is equal to this {@code
+ * BinaryStringData} object as determined by the {@link #equals(Object)} method.
+ */
+ public boolean startsWith(final BinaryStringData prefix) {
+ ensureMaterialized();
+ prefix.ensureMaterialized();
+ return matchAt(prefix, 0);
+ }
+
+ /**
+ * Tests if this BinaryStringData ends with the specified suffix.
+ *
+ * @param suffix the suffix.
+ * @return {@code true} if the bytes represented by the argument is a suffix of the bytes
+ * represented by this object; {@code false} otherwise. Note that the result will be {@code
+ * true} if the argument is the empty string or is equal to this {@code BinaryStringData}
+ * object as determined by the {@link #equals(Object)} method.
+ */
+ public boolean endsWith(final BinaryStringData suffix) {
+ ensureMaterialized();
+ suffix.ensureMaterialized();
+ return matchAt(suffix, binarySection.sizeInBytes - suffix.binarySection.sizeInBytes);
+ }
+
+ /**
+ * Returns a string whose value is this string, with any leading and trailing whitespace
+ * removed.
+ *
+ * @return A string whose value is this string, with any leading and trailing white space
+ * removed, or this string if it has no leading or trailing white space.
+ */
+ public BinaryStringData trim() {
+ ensureMaterialized();
+ if (inFirstSegment()) {
+ int s = 0;
+ int e = this.binarySection.sizeInBytes - 1;
+ // skip all of the space (0x20) in the left side
+ while (s < this.binarySection.sizeInBytes && getByteOneSegment(s) == 0x20) {
+ s++;
+ }
+ // skip all of the space (0x20) in the right side
+ while (e >= s && getByteOneSegment(e) == 0x20) {
+ e--;
+ }
+ if (s > e) {
+ // empty string
+ return EMPTY_UTF8;
+ } else {
+ return copyBinaryStringInOneSeg(s, e - s + 1);
+ }
+ } else {
+ return trimMultiSegs();
+ }
+ }
+
+ private BinaryStringData trimMultiSegs() {
+ int s = 0;
+ int e = this.binarySection.sizeInBytes - 1;
+ int segSize = binarySection.segments[0].size();
+ SegmentAndOffset front = firstSegmentAndOffset(segSize);
+ // skip all of the space (0x20) in the left side
+ while (s < this.binarySection.sizeInBytes && front.value() == 0x20) {
+ s++;
+ front.nextByte(segSize);
+ }
+ SegmentAndOffset behind = lastSegmentAndOffset(segSize);
+ // skip all of the space (0x20) in the right side
+ while (e >= s && behind.value() == 0x20) {
+ e--;
+ behind.previousByte(segSize);
+ }
+ if (s > e) {
+ // empty string
+ return EMPTY_UTF8;
+ } else {
+ return copyBinaryString(s, e);
+ }
+ }
+
+ /**
+ * Returns the index within this string of the first occurrence of the specified substring,
+ * starting at the specified index.
+ *
+ * @param str the substring to search for.
+ * @param fromIndex the index from which to start the search.
+ * @return the index of the first occurrence of the specified substring, starting at the
+ * specified index, or {@code -1} if there is no such occurrence.
+ */
+ public int indexOf(BinaryStringData str, int fromIndex) {
+ ensureMaterialized();
+ str.ensureMaterialized();
+ if (str.binarySection.sizeInBytes == 0) {
+ return 0;
+ }
+ if (inFirstSegment()) {
+ // position in byte
+ int byteIdx = 0;
+ // position is char
+ int charIdx = 0;
+ while (byteIdx < binarySection.sizeInBytes && charIdx < fromIndex) {
+ byteIdx += numBytesForFirstByte(getByteOneSegment(byteIdx));
+ charIdx++;
+ }
+ do {
+ if (byteIdx + str.binarySection.sizeInBytes > binarySection.sizeInBytes) {
+ return -1;
+ }
+ if (BinarySegmentUtils.equals(
+ binarySection.segments,
+ binarySection.offset + byteIdx,
+ str.binarySection.segments,
+ str.binarySection.offset,
+ str.binarySection.sizeInBytes)) {
+ return charIdx;
+ }
+ byteIdx += numBytesForFirstByte(getByteOneSegment(byteIdx));
+ charIdx++;
+ } while (byteIdx < binarySection.sizeInBytes);
+
+ return -1;
+ } else {
+ return indexOfMultiSegs(str, fromIndex);
+ }
+ }
+
+ private int indexOfMultiSegs(BinaryStringData str, int fromIndex) {
+ // position in byte
+ int byteIdx = 0;
+ // position is char
+ int charIdx = 0;
+ int segSize = binarySection.segments[0].size();
+ SegmentAndOffset index = firstSegmentAndOffset(segSize);
+ while (byteIdx < binarySection.sizeInBytes && charIdx < fromIndex) {
+ int charBytes = numBytesForFirstByte(index.value());
+ byteIdx += charBytes;
+ charIdx++;
+ index.skipBytes(charBytes, segSize);
+ }
+ do {
+ if (byteIdx + str.binarySection.sizeInBytes > binarySection.sizeInBytes) {
+ return -1;
+ }
+ if (BinarySegmentUtils.equals(
+ binarySection.segments,
+ binarySection.offset + byteIdx,
+ str.binarySection.segments,
+ str.binarySection.offset,
+ str.binarySection.sizeInBytes)) {
+ return charIdx;
+ }
+ int charBytes = numBytesForFirstByte(index.segment.get(index.offset));
+ byteIdx += charBytes;
+ charIdx++;
+ index.skipBytes(charBytes, segSize);
+ } while (byteIdx < binarySection.sizeInBytes);
+
+ return -1;
+ }
+
+ /**
+ * Converts all of the characters in this {@code BinaryStringData} to upper case.
+ *
+ * @return the {@code BinaryStringData}, converted to uppercase.
+ */
+ public BinaryStringData toUpperCase() {
+ if (javaObject != null) {
+ return javaToUpperCase();
+ }
+ if (binarySection.sizeInBytes == 0) {
+ return EMPTY_UTF8;
+ }
+ int size = binarySection.segments[0].size();
+ SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
+ byte[] bytes = new byte[binarySection.sizeInBytes];
+ bytes[0] = (byte) Character.toTitleCase(segmentAndOffset.value());
+ for (int i = 0; i < binarySection.sizeInBytes; i++) {
+ byte b = segmentAndOffset.value();
+ if (numBytesForFirstByte(b) != 1) {
+ // fallback
+ return javaToUpperCase();
+ }
+ int upper = Character.toUpperCase((int) b);
+ if (upper > 127) {
+ // fallback
+ return javaToUpperCase();
+ }
+ bytes[i] = (byte) upper;
+ segmentAndOffset.nextByte(size);
+ }
+ return fromBytes(bytes);
+ }
+
+ private BinaryStringData javaToUpperCase() {
+ return fromString(toString().toUpperCase());
+ }
+
+ /**
+ * Converts all of the characters in this {@code BinaryStringData} to lower case.
+ *
+ * @return the {@code BinaryStringData}, converted to lowercase.
+ */
+ public BinaryStringData toLowerCase() {
+ if (javaObject != null) {
+ return javaToLowerCase();
+ }
+ if (binarySection.sizeInBytes == 0) {
+ return EMPTY_UTF8;
+ }
+ int size = binarySection.segments[0].size();
+ SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
+ byte[] bytes = new byte[binarySection.sizeInBytes];
+ bytes[0] = (byte) Character.toTitleCase(segmentAndOffset.value());
+ for (int i = 0; i < binarySection.sizeInBytes; i++) {
+ byte b = segmentAndOffset.value();
+ if (numBytesForFirstByte(b) != 1) {
+ // fallback
+ return javaToLowerCase();
+ }
+ int lower = Character.toLowerCase((int) b);
+ if (lower > 127) {
+ // fallback
+ return javaToLowerCase();
+ }
+ bytes[i] = (byte) lower;
+ segmentAndOffset.nextByte(size);
+ }
+ return fromBytes(bytes);
+ }
+
+ private BinaryStringData javaToLowerCase() {
+ return fromString(toString().toLowerCase());
+ }
+
+ // ------------------------------------------------------------------------------------------
+ // Internal methods on BinaryStringData
+ // ------------------------------------------------------------------------------------------
+
+ byte getByteOneSegment(int i) {
+ return binarySection.segments[0].get(binarySection.offset + i);
+ }
+
+ boolean inFirstSegment() {
+ return binarySection.sizeInBytes + binarySection.offset <= binarySection.segments[0].size();
+ }
+
+ private boolean matchAt(final BinaryStringData s, int pos) {
+ return (inFirstSegment() && s.inFirstSegment())
+ ? matchAtOneSeg(s, pos)
+ : matchAtVarSeg(s, pos);
+ }
+
+ private boolean matchAtOneSeg(final BinaryStringData s, int pos) {
+ return s.binarySection.sizeInBytes + pos <= binarySection.sizeInBytes
+ && pos >= 0
+ && binarySection.segments[0].equalTo(
+ s.binarySection.segments[0],
+ binarySection.offset + pos,
+ s.binarySection.offset,
+ s.binarySection.sizeInBytes);
+ }
+
+ private boolean matchAtVarSeg(final BinaryStringData s, int pos) {
+ return s.binarySection.sizeInBytes + pos <= binarySection.sizeInBytes
+ && pos >= 0
+ && BinarySegmentUtils.equals(
+ binarySection.segments,
+ binarySection.offset + pos,
+ s.binarySection.segments,
+ s.binarySection.offset,
+ s.binarySection.sizeInBytes);
+ }
+
+ BinaryStringData copyBinaryStringInOneSeg(int start, int len) {
+ byte[] newBytes = new byte[len];
+ binarySection.segments[0].get(binarySection.offset + start, newBytes, 0, len);
+ return fromBytes(newBytes);
+ }
+
+ BinaryStringData copyBinaryString(int start, int end) {
+ int len = end - start + 1;
+ byte[] newBytes = new byte[len];
+ BinarySegmentUtils.copyToBytes(
+ binarySection.segments, binarySection.offset + start, newBytes, 0, len);
+ return fromBytes(newBytes);
+ }
+
+ SegmentAndOffset firstSegmentAndOffset(int segSize) {
+ int segIndex = binarySection.offset / segSize;
+ return new SegmentAndOffset(segIndex, binarySection.offset % segSize);
+ }
+
+ SegmentAndOffset lastSegmentAndOffset(int segSize) {
+ int lastOffset = binarySection.offset + binarySection.sizeInBytes - 1;
+ int segIndex = lastOffset / segSize;
+ return new SegmentAndOffset(segIndex, lastOffset % segSize);
+ }
+
+ private SegmentAndOffset startSegmentAndOffset(int segSize) {
+ return inFirstSegment()
+ ? new SegmentAndOffset(0, binarySection.offset)
+ : firstSegmentAndOffset(segSize);
+ }
+
+ /** CurrentSegment and positionInSegment. */
+ class SegmentAndOffset {
+ int segIndex;
+ MemorySegment segment;
+ int offset;
+
+ private SegmentAndOffset(int segIndex, int offset) {
+ this.segIndex = segIndex;
+ this.segment = binarySection.segments[segIndex];
+ this.offset = offset;
+ }
+
+ private void assignSegment() {
+ segment =
+ segIndex >= 0 && segIndex < binarySection.segments.length
+ ? binarySection.segments[segIndex]
+ : null;
+ }
+
+ void previousByte(int segSize) {
+ offset--;
+ if (offset == -1) {
+ segIndex--;
+ assignSegment();
+ offset = segSize - 1;
+ }
+ }
+
+ void nextByte(int segSize) {
+ offset++;
+ checkAdvance(segSize);
+ }
+
+ private void checkAdvance(int segSize) {
+ if (offset == segSize) {
+ advance();
+ }
+ }
+
+ private void advance() {
+ segIndex++;
+ assignSegment();
+ offset = 0;
+ }
+
+ void skipBytes(int n, int segSize) {
+ int remaining = segSize - this.offset;
+ if (remaining > n) {
+ this.offset += n;
+ } else {
+ while (true) {
+ int toSkip = Math.min(remaining, n);
+ n -= toSkip;
+ if (n <= 0) {
+ this.offset += toSkip;
+ checkAdvance(segSize);
+ return;
+ }
+ advance();
+ remaining = segSize - this.offset;
+ }
+ }
+ }
+
+ byte value() {
+ return this.segment.get(this.offset);
+ }
+ }
+
+ /**
+ * Returns the number of bytes for a code point with the first byte as `b`.
+ *
+ * @param b The first byte of a code point
+ */
+ static int numBytesForFirstByte(final byte b) {
+ if (b >= 0) {
+ // 1 byte, 7 bits: 0xxxxxxx
+ return 1;
+ } else if ((b >> 5) == -2 && (b & 0x1e) != 0) {
+ // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
+ return 2;
+ } else if ((b >> 4) == -2) {
+ // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
+ return 3;
+ } else if ((b >> 3) == -2) {
+ // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ return 4;
+ } else {
+ // Skip the first byte disallowed in UTF-8
+ // Handling errors quietly, same semantics to java String.
+ return 1;
+ }
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/LazyBinaryFormat.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/LazyBinaryFormat.java
new file mode 100644
index 00000000000..fe05b042495
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/LazyBinaryFormat.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.core.memory.MemorySegment;
+
+import java.io.IOException;
+
+/**
+ * An abstract implementation fo {@link BinaryFormat} which is lazily serialized into binary or
+ * lazily deserialized into Java object.
+ *
+ * The reason why we introduce this data structure is in order to save (de)serialization in
+ * nested function calls. Consider the following function call chain:
+ *
+ *
UDF0(input) -> UDF1(result0) -> UDF2(result1) -> UDF3(result2)
+ *
+ * Such nested calls, if the return values of UDFs are Java object format, it will result in
+ * multiple conversions between Java object and binary format:
+ *
+ *
+ * converterToBinary(UDF0(converterToJavaObject(input))) ->
+ * converterToBinary(UDF1(converterToJavaObject(result0))) ->
+ * converterToBinary(UDF2(converterToJavaObject(result1))) ->
+ * ...
+ *
+ *
+ * So we introduced {@link LazyBinaryFormat} to avoid the redundant cost, it has three forms:
+ *
+ *
+ * Binary form
+ * Java object form
+ * Binary and Java object both exist
+ *
+ *
+ * It can lazy the conversions as much as possible. It will be converted into required form only
+ * when it is needed.
+ */
+@Internal
+public abstract class LazyBinaryFormat implements BinaryFormat {
+
+ T javaObject;
+ BinarySection binarySection;
+
+ public LazyBinaryFormat() {
+ this(null, null);
+ }
+
+ public LazyBinaryFormat(MemorySegment[] segments, int offset, int sizeInBytes, T javaObject) {
+ this(javaObject, new BinarySection(segments, offset, sizeInBytes));
+ }
+
+ public LazyBinaryFormat(MemorySegment[] segments, int offset, int sizeInBytes) {
+ this(null, new BinarySection(segments, offset, sizeInBytes));
+ }
+
+ public LazyBinaryFormat(T javaObject) {
+ this(javaObject, null);
+ }
+
+ public LazyBinaryFormat(T javaObject, BinarySection binarySection) {
+ this.javaObject = javaObject;
+ this.binarySection = binarySection;
+ }
+
+ public T getJavaObject() {
+ return javaObject;
+ }
+
+ public BinarySection getBinarySection() {
+ return binarySection;
+ }
+
+ /** Must be public as it is used during code generation. */
+ public void setJavaObject(T javaObject) {
+ this.javaObject = javaObject;
+ }
+
+ @Override
+ public MemorySegment[] getSegments() {
+ if (binarySection == null) {
+ throw new IllegalStateException("Lazy Binary Format was not materialized");
+ }
+ return binarySection.segments;
+ }
+
+ @Override
+ public int getOffset() {
+ if (binarySection == null) {
+ throw new IllegalStateException("Lazy Binary Format was not materialized");
+ }
+ return binarySection.offset;
+ }
+
+ @Override
+ public int getSizeInBytes() {
+ if (binarySection == null) {
+ throw new IllegalStateException("Lazy Binary Format was not materialized");
+ }
+ return binarySection.sizeInBytes;
+ }
+
+ /** Ensure we have materialized binary format. */
+ public final void ensureMaterialized(TypeSerializer serializer) {
+ if (binarySection == null) {
+ try {
+ this.binarySection = materialize(serializer);
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ }
+
+ /**
+ * Materialize java object to binary format. Inherited classes need to hold the information they
+ * need.
+ */
+ protected abstract BinarySection materialize(TypeSerializer serializer) throws IOException;
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/MurmurHashUtils.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/MurmurHashUtils.java
new file mode 100644
index 00000000000..9caa30dfc3b
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/data/binary/MurmurHashUtils.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.data.binary;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+import org.apache.flink.core.memory.MemorySegment;
+
+import static org.apache.flink.core.memory.MemoryUtils.UNSAFE;
+
+/** Murmur Hash. This is inspired by Guava's Murmur3_32HashFunction. */
+@Internal
+final class MurmurHashUtils {
+
+ private static final int C1 = 0xcc9e2d51;
+ private static final int C2 = 0x1b873593;
+ public static final int DEFAULT_SEED = 42;
+
+ private MurmurHashUtils() {
+ // do not instantiate
+ }
+
+ /**
+ * Hash unsafe bytes, length must be aligned to 4 bytes.
+ *
+ * @param base base unsafe object
+ * @param offset offset for unsafe object
+ * @param lengthInBytes length in bytes
+ * @return hash code
+ */
+ public static int hashUnsafeBytesByWords(Object base, long offset, int lengthInBytes) {
+ return hashUnsafeBytesByWords(base, offset, lengthInBytes, DEFAULT_SEED);
+ }
+
+ /**
+ * Hash unsafe bytes.
+ *
+ * @param base base unsafe object
+ * @param offset offset for unsafe object
+ * @param lengthInBytes length in bytes
+ * @return hash code
+ */
+ public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
+ return hashUnsafeBytes(base, offset, lengthInBytes, DEFAULT_SEED);
+ }
+
+ /**
+ * Hash bytes in MemorySegment, length must be aligned to 4 bytes.
+ *
+ * @param segment segment.
+ * @param offset offset for MemorySegment
+ * @param lengthInBytes length in MemorySegment
+ * @return hash code
+ */
+ public static int hashBytesByWords(MemorySegment segment, int offset, int lengthInBytes) {
+ return hashBytesByWords(segment, offset, lengthInBytes, DEFAULT_SEED);
+ }
+
+ /**
+ * Hash bytes in MemorySegment.
+ *
+ * @param segment segment.
+ * @param offset offset for MemorySegment
+ * @param lengthInBytes length in MemorySegment
+ * @return hash code
+ */
+ public static int hashBytes(MemorySegment segment, int offset, int lengthInBytes) {
+ return hashBytes(segment, offset, lengthInBytes, DEFAULT_SEED);
+ }
+
+ private static int hashUnsafeBytesByWords(
+ Object base, long offset, int lengthInBytes, int seed) {
+ int h1 = hashUnsafeBytesByInt(base, offset, lengthInBytes, seed);
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashBytesByWords(
+ MemorySegment segment, int offset, int lengthInBytes, int seed) {
+ int h1 = hashBytesByInt(segment, offset, lengthInBytes, seed);
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashBytes(MemorySegment segment, int offset, int lengthInBytes, int seed) {
+ int lengthAligned = lengthInBytes - lengthInBytes % 4;
+ int h1 = hashBytesByInt(segment, offset, lengthAligned, seed);
+ for (int i = lengthAligned; i < lengthInBytes; i++) {
+ int k1 = mixK1(segment.get(offset + i));
+ h1 = mixH1(h1, k1);
+ }
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
+ assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
+ int lengthAligned = lengthInBytes - lengthInBytes % 4;
+ int h1 = hashUnsafeBytesByInt(base, offset, lengthAligned, seed);
+ for (int i = lengthAligned; i < lengthInBytes; i++) {
+ int halfWord = UNSAFE.getByte(base, offset + i);
+ int k1 = mixK1(halfWord);
+ h1 = mixH1(h1, k1);
+ }
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashUnsafeBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
+ assert (lengthInBytes % 4 == 0);
+ int h1 = seed;
+ for (int i = 0; i < lengthInBytes; i += 4) {
+ int halfWord = UNSAFE.getInt(base, offset + i);
+ int k1 = mixK1(halfWord);
+ h1 = mixH1(h1, k1);
+ }
+ return h1;
+ }
+
+ private static int hashBytesByInt(
+ MemorySegment segment, int offset, int lengthInBytes, int seed) {
+ assert (lengthInBytes % 4 == 0);
+ int h1 = seed;
+ for (int i = 0; i < lengthInBytes; i += 4) {
+ int halfWord = segment.getInt(offset + i);
+ int k1 = mixK1(halfWord);
+ h1 = mixH1(h1, k1);
+ }
+ return h1;
+ }
+
+ private static int mixK1(int k1) {
+ k1 *= C1;
+ k1 = Integer.rotateLeft(k1, 15);
+ k1 *= C2;
+ return k1;
+ }
+
+ private static int mixH1(int h1, int k1) {
+ h1 ^= k1;
+ h1 = Integer.rotateLeft(h1, 13);
+ h1 = h1 * 5 + 0xe6546b64;
+ return h1;
+ }
+
+ // Finalization mix - force all bits of a hash block to avalanche
+ private static int fmix(int h1, int length) {
+ h1 ^= length;
+ return fmix(h1);
+ }
+
+ public static int fmix(int h) {
+ h ^= h >>> 16;
+ h *= 0x85ebca6b;
+ h ^= h >>> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >>> 16;
+ return h;
+ }
+
+ public static long fmix(long h) {
+ h ^= (h >>> 33);
+ h *= 0xff51afd7ed558ccdL;
+ h ^= (h >>> 33);
+ h *= 0xc4ceb9fe1a85ec53L;
+ h ^= (h >>> 33);
+ return h;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/Factory.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/Factory.java
new file mode 100644
index 00000000000..b8a13b7443c
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/Factory.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.factories;
+
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+
+import java.util.Set;
+
+/**
+ * Base interface for all kind of factories that create object instances from a list of key-value
+ * pairs in Flink CDC DataSource & DataSink API.
+ *
+ * A factory is uniquely identified by {@link Class} and {@link #identifier()}.
+ *
+ *
The list of available factories is discovered using Java's Service Provider Interfaces (SPI).
+ * Classes that implement this interface can be added to {@code META_INF/services/Factory} in JAR
+ * files.
+ *
+ *
Every factory declares a set of required and optional options. This information will not be
+ * used during discovery but is helpful when generating documentation and performing validation. A
+ * factory may discover further (nested) factories, the options of the nested factories must not be
+ * declared in the sets of this factory.
+ *
+ *
It is the responsibility of each factory to perform validation before returning an instance.
+ */
+@PublicEvolving
+public interface Factory {
+
+ /** Returns a unique identifier among same factory interfaces. */
+ String identifier();
+
+ /**
+ * Returns a set of {@link ConfigOption} that an implementation of this factory requires in
+ * addition to {@link #optionalOptions()}.
+ */
+ Set> requiredOptions();
+
+ /**
+ * Returns a set of {@link ConfigOption} that an implementation of this factory consumes in
+ * addition to {@link #requiredOptions()}.
+ */
+ Set> optionalOptions();
+
+ /** Provides session information describing the factory to be accessed. */
+ @PublicEvolving
+ interface Context {
+
+ /**
+ * Returns the factory options used to create the object instances.
+ *
+ * @return options of the current session.
+ */
+ Configuration getFactoryConfiguration();
+
+ /** Returns the configuration of current pipeline. */
+ Configuration getPipelineConfiguration();
+
+ /**
+ * Returns the class loader of the current session.
+ *
+ * The class loader is in particular useful for discovering factories.
+ */
+ ClassLoader getClassLoader();
+
+ /** Returns the flink configuration of the current session. */
+ default ReadableConfig getFlinkConf() {
+ return new org.apache.flink.configuration.Configuration();
+ }
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/FactoryHelper.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/FactoryHelper.java
new file mode 100644
index 00000000000..8e4ff02276e
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/factories/FactoryHelper.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.factories;
+
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.Configuration;
+import org.apache.flink.cdc.common.configuration.FallbackKey;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.table.api.ValidationException;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+/** A helper for working with {@link Factory}. */
+@PublicEvolving
+public class FactoryHelper {
+
+ private final Factory factory;
+ private final Factory.Context context;
+
+ private FactoryHelper(Factory factory, Factory.Context context) {
+ this.factory = factory;
+ this.context = context;
+ }
+
+ public static FactoryHelper createFactoryHelper(Factory factory, Factory.Context context) {
+ return new FactoryHelper(factory, context);
+ }
+
+ /**
+ * Validates the required and optional {@link ConfigOption}s of a factory.
+ *
+ *
Note: It does not check for left-over options.
+ */
+ public static void validateFactoryOptions(Factory factory, Configuration configuration) {
+ validateFactoryOptions(factory.requiredOptions(), factory.optionalOptions(), configuration);
+ }
+
+ /**
+ * Validates the required options and optional options.
+ *
+ *
Note: It does not check for left-over options.
+ */
+ public static void validateFactoryOptions(
+ Set> requiredOptions,
+ Set> optionalOptions,
+ Configuration configuration) {
+ final List missingRequiredOptions =
+ requiredOptions.stream()
+ .filter(option -> configuration.get(option) == null)
+ .flatMap(FactoryHelper::allKeys)
+ .sorted()
+ .collect(Collectors.toList());
+
+ if (!missingRequiredOptions.isEmpty()) {
+ throw new ValidationException(
+ String.format(
+ "One or more required options are missing.\n\n"
+ + "Missing required options are:\n\n"
+ + "%s",
+ String.join("\n", missingRequiredOptions)));
+ }
+
+ optionalOptions.forEach(configuration::getOptional);
+ }
+
+ /** Validates unconsumed option keys. */
+ public static void validateUnconsumedKeys(
+ String factoryIdentifier, Set allOptionKeys, Set consumedOptionKeys) {
+ final Set remainingOptionKeys = new HashSet<>(allOptionKeys);
+ remainingOptionKeys.removeAll(consumedOptionKeys);
+ if (!remainingOptionKeys.isEmpty()) {
+ throw new ValidationException(
+ String.format(
+ "Unsupported options found for '%s'.\n\n"
+ + "Unsupported options:\n\n"
+ + "%s\n\n"
+ + "Supported options:\n\n"
+ + "%s",
+ factoryIdentifier,
+ remainingOptionKeys.stream().sorted().collect(Collectors.joining("\n")),
+ String.join("\n", consumedOptionKeys)));
+ }
+ }
+
+ /** Validates the options of the factory. It checks for unconsumed option keys. */
+ public void validate() {
+ Set allOptionKeys =
+ Stream.concat(
+ factory.requiredOptions().stream().flatMap(FactoryHelper::allKeys),
+ factory.optionalOptions().stream().flatMap(FactoryHelper::allKeys))
+ .collect(Collectors.toSet());
+
+ validateFactoryOptions(factory, context.getFactoryConfiguration());
+ validateUnconsumedKeys(
+ factory.identifier(), context.getFactoryConfiguration().getKeys(), allOptionKeys);
+ }
+
+ /**
+ * Validates the options of the factory. It checks for unconsumed option keys while ignoring the
+ * options with given prefixes.
+ *
+ * The option keys that have given prefix {@code prefixToSkip} would just be skipped for
+ * validation.
+ *
+ * @param prefixesToSkip Set of option key prefixes to skip validation
+ */
+ public void validateExcept(String... prefixesToSkip) {
+ Preconditions.checkArgument(
+ prefixesToSkip.length > 0, "Prefixes to skip can not be empty.");
+
+ final List prefixesList = Arrays.asList(prefixesToSkip);
+
+ Set allOptionKeys =
+ Stream.concat(
+ factory.requiredOptions().stream().flatMap(FactoryHelper::allKeys),
+ factory.optionalOptions().stream().flatMap(FactoryHelper::allKeys))
+ .collect(Collectors.toSet());
+
+ Set filteredOptionKeys =
+ context.getFactoryConfiguration().getKeys().stream()
+ .filter(key -> prefixesList.stream().noneMatch(key::startsWith))
+ .collect(Collectors.toSet());
+
+ validateFactoryOptions(factory, context.getFactoryConfiguration());
+ validateUnconsumedKeys(factory.identifier(), filteredOptionKeys, allOptionKeys);
+ }
+
+ private static Stream allKeys(ConfigOption> option) {
+ return Stream.concat(
+ Stream.of(option.key()),
+ StreamSupport.stream(option.fallbackKeys().spliterator(), false)
+ .map(FallbackKey::getKey));
+ }
+
+ public ReadableConfig getFormatConfig(String formatPrefix) {
+ final String prefix = formatPrefix + ".";
+ Map formatConfigMap = new HashMap<>();
+ context.getFactoryConfiguration()
+ .toMap()
+ .forEach(
+ (k, v) -> {
+ if (k.startsWith(prefix)) {
+ formatConfigMap.put(k.substring(prefix.length()), v);
+ }
+ });
+ return org.apache.flink.configuration.Configuration.fromMap(formatConfigMap);
+ }
+
+ /** Default implementation of {@link Factory.Context}. */
+ public static class DefaultContext implements Factory.Context {
+
+ private final Configuration factoryConfiguration;
+ private final ClassLoader classLoader;
+ private final Configuration pipelineConfiguration;
+ private final ReadableConfig flinkConf;
+
+ public DefaultContext(
+ Configuration factoryConfiguration,
+ Configuration pipelineConfiguration,
+ ClassLoader classLoader) {
+ this(
+ factoryConfiguration,
+ pipelineConfiguration,
+ classLoader,
+ new org.apache.flink.configuration.Configuration());
+ }
+
+ public DefaultContext(
+ Configuration factoryConfiguration,
+ Configuration pipelineConfiguration,
+ ClassLoader classLoader,
+ ReadableConfig flinkConf) {
+ this.factoryConfiguration = factoryConfiguration;
+ this.pipelineConfiguration = pipelineConfiguration;
+ this.classLoader = classLoader;
+ this.flinkConf = flinkConf;
+ }
+
+ @Override
+ public Configuration getFactoryConfiguration() {
+ return factoryConfiguration;
+ }
+
+ @Override
+ public Configuration getPipelineConfiguration() {
+ return pipelineConfiguration;
+ }
+
+ @Override
+ public ClassLoader getClassLoader() {
+ return classLoader;
+ }
+
+ @Override
+ public ReadableConfig getFlinkConf() {
+ return flinkConf;
+ }
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/route/TableIdRouter.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/route/TableIdRouter.java
new file mode 100755
index 00000000000..816039939ef
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/route/TableIdRouter.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.route;
+
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.schema.Selectors;
+
+import org.apache.flink.shaded.guava33.com.google.common.cache.CacheBuilder;
+import org.apache.flink.shaded.guava33.com.google.common.cache.CacheLoader;
+import org.apache.flink.shaded.guava33.com.google.common.cache.LoadingCache;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nonnull;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
+
+/**
+ * Calculates how upstream data change events should be dispatched to downstream tables. Returns one
+ * or many destination Table IDs based on provided routing rules.
+ */
+@PublicEvolving
+public class TableIdRouter {
+
+ private static final Logger LOG = LoggerFactory.getLogger(TableIdRouter.class);
+ private static final Duration CACHE_EXPIRE_DURATION = Duration.ofDays(1);
+
+ private final List> routes;
+ private final LoadingCache> routingCache;
+
+ private static final String DOT_PLACEHOLDER = "_dot_placeholder_";
+
+ /**
+ * Currently, The supported regular syntax is not exactly the same in {@link Selectors}.
+ *
+ * The main discrepancies are :
+ *
+ *
1) {@link Selectors} use {@code ,} to split table names instead of `|`.
+ *
+ *
2) If there is a need to use a dot ({@code .}) in a regular expression to match any
+ * character, it is necessary to escape the dot with a backslash.
+ *
+ *
3) The unescaped {@code .} is used as the separator of database and table name. When
+ * converting to Debezium style, it is expected to be escaped to match the dot ({@code .})
+ * literally instead of the meta-character.
+ */
+ public static String convertTableListToRegExpPattern(String tables) {
+ LOG.info("Rewriting CDC style table capture list: {}", tables);
+
+ // In CDC-style table matching, table names could be separated by `,` character.
+ // Convert it to `|` as it's standard RegEx syntax.
+ tables =
+ Arrays.stream(tables.split(",")).map(String::trim).collect(Collectors.joining("|"));
+ LOG.info("Expression after replacing comma with vert separator: {}", tables);
+
+ // Essentially, we're just trying to swap escaped `\\.` and unescaped `.`.
+ // In our table matching syntax, `\\.` means RegEx token matcher and `.` means database &
+ // table name separator.
+ // On the contrary, while we're matching TableId string, `\\.` means matching the "dot"
+ // literal and `.` is the meta-character.
+
+ // Step 1: escape the dot with a backslash, but keep it as a placeholder (like `$`).
+ // For example, `db\.*.tbl\.*` => `db$*.tbl$*`
+ String unescapedTables = tables.replace("\\.", DOT_PLACEHOLDER);
+ LOG.info("Expression after un-escaping dots as RegEx meta-character: {}", unescapedTables);
+
+ // Step 2: replace all remaining dots (`.`) to quoted version (`\.`), as a separator between
+ // database and table names.
+ // For example, `db$*.tbl$*` => `db$*\.tbl$*`
+ String unescapedTablesWithDbTblSeparator = unescapedTables.replace(".", "\\.");
+ LOG.info("Re-escaping dots as TableId delimiter: {}", unescapedTablesWithDbTblSeparator);
+
+ // Step 3: restore placeholder to normal RegEx matcher (`.`)
+ // For example, `db$*\.tbl$*` => `db.*\.tbl.*`
+ String standardRegExpTableCaptureList =
+ unescapedTablesWithDbTblSeparator.replace(DOT_PLACEHOLDER, ".");
+ LOG.info("Final standard RegExp table capture list: {}", standardRegExpTableCaptureList);
+
+ return standardRegExpTableCaptureList;
+ }
+
+ public TableIdRouter(List routingRules) {
+ this.routes = new ArrayList<>();
+ for (RouteRule rule : routingRules) {
+ try {
+ routes.add(
+ new Tuple3<>(
+ Pattern.compile(convertTableListToRegExpPattern(rule.sourceTable)),
+ rule.sinkTable,
+ rule.replaceSymbol));
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Failed to parse regular expression in routing rule %s. Notice that `.` is used to separate Table ID components. To use it as a regex token, put a `\\` before to escape it.",
+ rule),
+ e);
+ }
+ }
+ this.routingCache =
+ CacheBuilder.newBuilder()
+ .expireAfterAccess(CACHE_EXPIRE_DURATION)
+ .build(
+ new CacheLoader>() {
+ @Override
+ public @Nonnull List load(@Nonnull TableId key) {
+ return calculateRoute(key);
+ }
+ });
+ }
+
+ public List route(TableId sourceTableId) {
+ return routingCache.getUnchecked(sourceTableId);
+ }
+
+ private List calculateRoute(TableId sourceTableId) {
+ List routedTableIds =
+ routes.stream()
+ .filter(route -> matches(route.f0, sourceTableId))
+ .map(route -> resolveReplacement(sourceTableId, route))
+ .collect(Collectors.toList());
+ if (routedTableIds.isEmpty()) {
+ routedTableIds.add(sourceTableId);
+ }
+ return routedTableIds;
+ }
+
+ private TableId resolveReplacement(
+ TableId originalTable, Tuple3 route) {
+ if (route.f2 != null) {
+ return TableId.parse(route.f1.replace(route.f2, originalTable.getTableName()));
+ } else {
+ Matcher matcher = route.f0.matcher(originalTable.toString());
+ if (matcher.find()) {
+ return TableId.parse(matcher.replaceAll(route.f1));
+ }
+ }
+ return TableId.parse(route.f1);
+ }
+
+ /**
+ * Group the source tables that conform to the same routing rule together. The total number of
+ * groups is less than or equal to the number of routing rules. For the source tables within
+ * each group, their table structures will be merged to obtain the widest table structure in
+ * that group. The structures of all tables within the group will be expanded to this widest
+ * table structure.
+ *
+ * @param tableIdSet The tables need to be grouped by the router
+ * @return The tables grouped by the router
+ */
+ public List> groupSourceTablesByRouteRule(Set tableIdSet) {
+ if (routes.isEmpty()) {
+ return new ArrayList<>();
+ }
+ return routes.stream()
+ .map(
+ route ->
+ tableIdSet.stream()
+ .filter(tableId -> matches(route.f0, tableId))
+ .collect(Collectors.toSet()))
+ .collect(Collectors.toList());
+ }
+
+ private static boolean matches(Pattern pattern, TableId tableId) {
+ return pattern.matcher(tableId.toString()).matches();
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/schema/Selectors.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/schema/Selectors.java
new file mode 100644
index 00000000000..21bc1c2ee21
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/schema/Selectors.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.schema;
+
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.utils.Predicates;
+
+import org.apache.flink.shaded.guava33.com.google.common.cache.Cache;
+import org.apache.flink.shaded.guava33.com.google.common.cache.CacheBuilder;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Predicate;
+
+/** Selectors for filtering tables. */
+public class Selectors {
+
+ private static final Duration CACHE_EXPIRE_DURATION = Duration.ofHours(1);
+
+ private List selectors;
+
+ private final Cache cache =
+ CacheBuilder.newBuilder()
+ .expireAfterAccess(CACHE_EXPIRE_DURATION)
+ .maximumSize(1024)
+ .build();
+
+ private Selectors() {}
+
+ /**
+ * A {@link Selector} that determines whether a table identified by a given {@link TableId} is
+ * to be included.
+ */
+ private static class Selector {
+ private final Predicate namespacePred;
+ private final Predicate schemaNamePred;
+ private final Predicate tableNamePred;
+
+ public Selector(String namespace, String schemaName, String tableName) {
+ this.namespacePred =
+ namespace == null ? (namespacePred) -> false : Predicates.includes(namespace);
+ this.schemaNamePred =
+ schemaName == null
+ ? (schemaNamePred) -> false
+ : Predicates.includes(schemaName);
+ this.tableNamePred =
+ tableName == null ? (tableNamePred) -> false : Predicates.includes(tableName);
+ }
+
+ public boolean isMatch(TableId tableId) {
+
+ String namespace = tableId.getNamespace();
+ String schemaName = tableId.getSchemaName();
+
+ if (namespace == null || namespace.isEmpty()) {
+ if (schemaName == null || schemaName.isEmpty()) {
+ return tableNamePred.test(tableId.getTableName());
+ }
+ return schemaNamePred.test(tableId.getSchemaName())
+ && tableNamePred.test(tableId.getTableName());
+ }
+ return namespacePred.test(tableId.getNamespace())
+ && schemaNamePred.test(tableId.getSchemaName())
+ && tableNamePred.test(tableId.getTableName());
+ }
+ }
+
+ /** Match the {@link TableId} against the {@link Selector}s. */
+ public boolean isMatch(TableId tableId) {
+ Boolean cachedResult = cache.getIfPresent(tableId);
+ if (cachedResult != null) {
+ return cachedResult;
+ }
+
+ boolean match = computeIsMatch(tableId);
+ cache.put(tableId, match);
+ return match;
+ }
+
+ /** Computes the match result if not present in the cache. */
+ private boolean computeIsMatch(TableId tableId) {
+ for (Selector selector : selectors) {
+ if (selector.isMatch(tableId)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /** Builder for {@link Selectors}. */
+ public static class SelectorsBuilder {
+
+ private List selectors;
+
+ public SelectorsBuilder includeTables(String tableInclusions) {
+
+ if (tableInclusions == null || tableInclusions.isEmpty()) {
+ throw new IllegalArgumentException(
+ "Invalid table inclusion pattern cannot be null or empty");
+ }
+
+ List selectors = new ArrayList<>();
+ Set tableSplitSet =
+ Predicates.setOf(
+ tableInclusions, Predicates.RegExSplitterByComma::split, (str) -> str);
+ for (String tableSplit : tableSplitSet) {
+ List tableIdList =
+ Predicates.listOf(
+ tableSplit, Predicates.RegExSplitterByDot::split, (str) -> str);
+ Iterator iterator = tableIdList.iterator();
+ if (tableIdList.size() == 1) {
+ selectors.add(new Selector(null, null, iterator.next()));
+ } else if (tableIdList.size() == 2) {
+ selectors.add(new Selector(null, iterator.next(), iterator.next()));
+ } else if (tableIdList.size() == 3) {
+ selectors.add(new Selector(iterator.next(), iterator.next(), iterator.next()));
+ } else {
+ throw new IllegalArgumentException(
+ "Invalid table inclusion pattern: " + tableInclusions);
+ }
+ }
+ this.selectors = selectors;
+ return this;
+ }
+
+ public Selectors build() {
+ Selectors selectors = new Selectors();
+ selectors.selectors = this.selectors;
+ return selectors;
+ }
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/EventSinkProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/EventSinkProvider.java
new file mode 100644
index 00000000000..5cb8fb6e2bd
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/EventSinkProvider.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.sink;
+
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.streaming.api.functions.sink.legacy.SinkFunction;
+
+/**
+ * A marker interface used to provide an event sink for writing change events to external systems.
+ * We can reuse exiting Flink {@link Sink} and Flink {@link SinkFunction} implementation, and we can
+ * support our own {@code EventSink} Implementation in the future.
+ */
+@PublicEvolving
+public interface EventSinkProvider {}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkFunctionProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkFunctionProvider.java
new file mode 100644
index 00000000000..8fcc11fe55f
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkFunctionProvider.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.sink;
+
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.event.Event;
+import org.apache.flink.streaming.api.functions.sink.legacy.SinkFunction;
+
+/**
+ * {@code FlinkSinkFunctionProvider} is used to provide a Flink {@link SinkFunction} for writing
+ * events to external systems.
+ */
+@PublicEvolving
+public interface FlinkSinkFunctionProvider extends EventSinkProvider {
+
+ /** Get the {@link SinkFunction} for writing events to external systems. */
+ SinkFunction getSinkFunction();
+
+ /** Create a {@link FlinkSinkFunctionProvider} from a {@link SinkFunction}. */
+ static FlinkSinkFunctionProvider of(SinkFunction sinkFunction) {
+ return () -> sinkFunction;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkProvider.java
new file mode 100644
index 00000000000..9daac630a4a
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/sink/FlinkSinkProvider.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.sink;
+
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.event.Event;
+
+/**
+ * {@code FlinkSinkProvider} is used to provide a Flink {@link Sink} for writing events to external
+ * systems.
+ */
+@PublicEvolving
+public interface FlinkSinkProvider extends EventSinkProvider {
+
+ /** Get the {@link Sink} for writing events to external systems. */
+ Sink getSink();
+
+ /** Create a {@link FlinkSinkProvider} from a {@link Sink}. */
+ static FlinkSinkProvider of(Sink sink) {
+ return () -> sink;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/EventSourceProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/EventSourceProvider.java
new file mode 100644
index 00000000000..9ec4a8a1a5b
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/EventSourceProvider.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.source;
+
+import org.apache.flink.api.connector.source.Source;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.streaming.api.functions.source.legacy.SourceFunction;
+
+/**
+ * A marker interface used to provide an event source for reading events from external systems. We
+ * can reuse exiting Flink {@link Source} and Flink {@link SourceFunction} implementation, and we
+ * can support our own {@code EventSource} implementation in the future.
+ */
+@PublicEvolving
+public interface EventSourceProvider {}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceFunctionProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceFunctionProvider.java
new file mode 100644
index 00000000000..a78f2e1f94f
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceFunctionProvider.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.source;
+
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.event.Event;
+import org.apache.flink.streaming.api.functions.source.legacy.SourceFunction;
+
+/**
+ * {@code FlinkSourceFunctionProvider} is used to provide a Flink {@link SourceFunction} for reading
+ * events from external systems.
+ */
+@PublicEvolving
+public interface FlinkSourceFunctionProvider extends EventSourceProvider {
+
+ /** Get the {@link SourceFunction} for reading events from external systems. */
+ SourceFunction getSourceFunction();
+
+ /** Create a {@link FlinkSourceFunctionProvider} from a {@link SourceFunction}. */
+ static FlinkSourceFunctionProvider of(SourceFunction sourceFunction) {
+ return () -> sourceFunction;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceProvider.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceProvider.java
new file mode 100644
index 00000000000..6a3e35f0be9
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/source/FlinkSourceProvider.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.source;
+
+import org.apache.flink.api.connector.source.Source;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.event.Event;
+
+/**
+ * {@code FlinkSourceProvider} is used to provide a Flink {@link Source} for reading events from
+ * external systems.
+ */
+@PublicEvolving
+public interface FlinkSourceProvider extends EventSourceProvider {
+
+ /** Get the {@link Source} for reading events from external systems. */
+ Source getSource();
+
+ /** Create a {@link FlinkSourceProvider} from a {@link Source}. */
+ static FlinkSourceProvider of(Source source) {
+ return () -> source;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/DataField.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/DataField.java
new file mode 100644
index 00000000000..68e2d17eba7
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/DataField.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types;
+
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.types.utils.DataTypeUtils;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.table.types.utils.LogicalTypeDataTypeConverter;
+
+import javax.annotation.Nullable;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+import static org.apache.flink.cdc.common.utils.EncodingUtils.escapeIdentifier;
+import static org.apache.flink.cdc.common.utils.EncodingUtils.escapeSingleQuotes;
+
+/**
+ * Defines the field of a row type.
+ *
+ * @see RowType
+ */
+@PublicEvolving
+public class DataField implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public static final String FIELD_FORMAT_WITH_DESCRIPTION = "%s %s '%s'";
+
+ public static final String FIELD_FORMAT_NO_DESCRIPTION = "%s %s";
+
+ private final String name;
+
+ private final DataType type;
+
+ private final @Nullable String description;
+
+ public DataField(String name, DataType type, @Nullable String description) {
+ this.name = Preconditions.checkNotNull(name, "Field name must not be null.");
+ this.type = Preconditions.checkNotNull(type, "Field type must not be null.");
+ this.description = description;
+ }
+
+ public DataField(String name, DataType type) {
+ this(name, type, null);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public DataType getType() {
+ return type;
+ }
+
+ @Nullable
+ public String getDescription() {
+ return description;
+ }
+
+ public DataField copy() {
+ return new DataField(name, type.copy(), description);
+ }
+
+ public String asSummaryString() {
+ return formatString(type.asSummaryString(), true);
+ }
+
+ public String asSerializableString() {
+ return formatString(type.asSerializableString(), false);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DataField rowField = (DataField) o;
+ return name.equals(rowField.name)
+ && type.equals(rowField.type)
+ && Objects.equals(description, rowField.description);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, type, description);
+ }
+
+ private String formatString(String typeString, boolean excludeDescription) {
+ if (description == null) {
+ return String.format(FIELD_FORMAT_NO_DESCRIPTION, escapeIdentifier(name), typeString);
+ } else if (excludeDescription) {
+ return String.format(
+ FIELD_FORMAT_WITH_DESCRIPTION, escapeIdentifier(name), typeString, "...");
+ } else {
+ return String.format(
+ FIELD_FORMAT_WITH_DESCRIPTION,
+ escapeIdentifier(name),
+ typeString,
+ escapeSingleQuotes(description));
+ }
+ }
+
+ public org.apache.flink.table.api.DataTypes.Field toFlinkDataTypeField() {
+ return description == null
+ ? org.apache.flink.table.api.DataTypes.FIELD(
+ name, DataTypeUtils.toFlinkDataType(type))
+ : org.apache.flink.table.api.DataTypes.FIELD(
+ name, DataTypeUtils.toFlinkDataType(type), description);
+ }
+
+ public static DataField fromFlinkDataTypeField(
+ org.apache.flink.table.types.logical.RowType.RowField rowField) {
+ return DataTypes.FIELD(
+ rowField.getName(),
+ DataTypeUtils.fromFlinkDataType(
+ LogicalTypeDataTypeConverter.toDataType(rowField.getType())),
+ rowField.getDescription().orElse(null));
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/DataTypeUtils.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/DataTypeUtils.java
new file mode 100644
index 00000000000..e712f838400
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/DataTypeUtils.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.utils;
+
+import org.apache.flink.cdc.common.data.ArrayData;
+import org.apache.flink.cdc.common.data.DateData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.MapData;
+import org.apache.flink.cdc.common.data.RecordData;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.data.TimeData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.data.ZonedTimestampData;
+import org.apache.flink.cdc.common.types.DataField;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.DataTypes;
+import org.apache.flink.cdc.common.types.RowType;
+import org.apache.flink.cdc.common.utils.Preconditions;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.util.CollectionUtil;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getLength;
+import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getPrecision;
+import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getScale;
+
+/** Utilities for handling {@link DataType}s. */
+public class DataTypeUtils {
+ /**
+ * Returns the conversion class for the given {@link DataType} that is used by the table runtime
+ * as internal data structure.
+ */
+ public static Class> toInternalConversionClass(DataType type) {
+ // ordered by type root definition
+ switch (type.getTypeRoot()) {
+ case CHAR:
+ case VARCHAR:
+ return StringData.class;
+ case BOOLEAN:
+ return Boolean.class;
+ case BINARY:
+ case VARBINARY:
+ return byte[].class;
+ case DECIMAL:
+ return DecimalData.class;
+ case TINYINT:
+ return Byte.class;
+ case SMALLINT:
+ return Short.class;
+ case INTEGER:
+ return Integer.class;
+ case DATE:
+ return DateData.class;
+ case TIME_WITHOUT_TIME_ZONE:
+ return TimeData.class;
+ case BIGINT:
+ return Long.class;
+ case FLOAT:
+ return Float.class;
+ case DOUBLE:
+ return Double.class;
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ return TimestampData.class;
+ case TIMESTAMP_WITH_TIME_ZONE:
+ return ZonedTimestampData.class;
+ case ARRAY:
+ return ArrayData.class;
+ case MAP:
+ return MapData.class;
+ case ROW:
+ return RecordData.class;
+ default:
+ throw new IllegalArgumentException("Illegal type: " + type);
+ }
+ }
+
+ /**
+ * Convert CDC's {@link DataType} to Flink's internal {@link
+ * org.apache.flink.table.types.DataType}.
+ */
+ public static org.apache.flink.table.types.DataType toFlinkDataType(DataType type) {
+ // ordered by type root definition
+ List children = type.getChildren();
+ int length = DataTypes.getLength(type).orElse(0);
+ int precision = DataTypes.getPrecision(type).orElse(0);
+ int scale = DataTypes.getScale(type).orElse(0);
+ switch (type.getTypeRoot()) {
+ case CHAR:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.CHAR(length)
+ : org.apache.flink.table.api.DataTypes.CHAR(length).notNull();
+ case VARCHAR:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.VARCHAR(length)
+ : org.apache.flink.table.api.DataTypes.VARCHAR(length).notNull();
+ case BOOLEAN:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.BOOLEAN()
+ : org.apache.flink.table.api.DataTypes.BOOLEAN().notNull();
+ case BINARY:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.BINARY(length)
+ : org.apache.flink.table.api.DataTypes.BINARY(length).notNull();
+ case VARBINARY:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.VARBINARY(length)
+ : org.apache.flink.table.api.DataTypes.VARBINARY(length).notNull();
+ case DECIMAL:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.DECIMAL(precision, scale)
+ : org.apache.flink.table.api.DataTypes.DECIMAL(precision, scale).notNull();
+ case TINYINT:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.TINYINT()
+ : org.apache.flink.table.api.DataTypes.TINYINT().notNull();
+ case SMALLINT:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.SMALLINT()
+ : org.apache.flink.table.api.DataTypes.SMALLINT().notNull();
+ case INTEGER:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.INT()
+ : org.apache.flink.table.api.DataTypes.INT().notNull();
+ case DATE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.DATE()
+ : org.apache.flink.table.api.DataTypes.DATE().notNull();
+ case TIME_WITHOUT_TIME_ZONE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.TIME(precision)
+ : org.apache.flink.table.api.DataTypes.TIME(precision).notNull();
+ case BIGINT:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.BIGINT()
+ : org.apache.flink.table.api.DataTypes.BIGINT().notNull();
+ case FLOAT:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.FLOAT()
+ : org.apache.flink.table.api.DataTypes.FLOAT().notNull();
+ case DOUBLE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.DOUBLE()
+ : org.apache.flink.table.api.DataTypes.DOUBLE().notNull();
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.TIMESTAMP(precision)
+ : org.apache.flink.table.api.DataTypes.TIMESTAMP(precision).notNull();
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(
+ precision)
+ : org.apache.flink.table.api.DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(
+ precision)
+ .notNull();
+ case TIMESTAMP_WITH_TIME_ZONE:
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.TIMESTAMP_WITH_TIME_ZONE(precision)
+ : org.apache.flink.table.api.DataTypes.TIMESTAMP_WITH_TIME_ZONE(precision)
+ .notNull();
+ case ARRAY:
+ Preconditions.checkState(children != null && !children.isEmpty());
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.ARRAY(
+ toFlinkDataType(children.get(0)))
+ : org.apache.flink.table.api.DataTypes.ARRAY(
+ toFlinkDataType(children.get(0)))
+ .notNull();
+ case MAP:
+ Preconditions.checkState(children != null && children.size() > 1);
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.MAP(
+ toFlinkDataType(children.get(0)), toFlinkDataType(children.get(1)))
+ : org.apache.flink.table.api.DataTypes.MAP(
+ toFlinkDataType(children.get(0)),
+ toFlinkDataType(children.get(1)))
+ .notNull();
+ case ROW:
+ Preconditions.checkState(!CollectionUtil.isNullOrEmpty(children));
+ RowType rowType = (RowType) type;
+ List fields =
+ rowType.getFields().stream()
+ .map(DataField::toFlinkDataTypeField)
+ .collect(Collectors.toList());
+ return type.isNullable()
+ ? org.apache.flink.table.api.DataTypes.ROW(fields)
+ : org.apache.flink.table.api.DataTypes.ROW(fields).notNull();
+ default:
+ throw new IllegalArgumentException("Illegal type: " + type);
+ }
+ }
+
+ /**
+ * Convert Flink's internal {@link org.apache.flink.table.types.DataType} to CDC's {@link
+ * DataType}.
+ */
+ public static DataType fromFlinkDataType(org.apache.flink.table.types.DataType flinkType) {
+ LogicalType logicalType = flinkType.getLogicalType();
+ List children = flinkType.getChildren();
+ DataType dataType;
+ switch (logicalType.getTypeRoot()) {
+ case CHAR:
+ dataType = DataTypes.CHAR(getLength(logicalType));
+ break;
+ case VARCHAR:
+ dataType = DataTypes.VARCHAR(getLength(logicalType));
+ break;
+ case BOOLEAN:
+ dataType = DataTypes.BOOLEAN();
+ break;
+ case BINARY:
+ dataType = DataTypes.BINARY(getLength(logicalType));
+ break;
+ case VARBINARY:
+ dataType = DataTypes.VARBINARY(getLength(logicalType));
+ break;
+ case DECIMAL:
+ dataType = DataTypes.DECIMAL(getPrecision(logicalType), getScale(logicalType));
+ break;
+ case TINYINT:
+ dataType = DataTypes.TINYINT();
+ break;
+ case SMALLINT:
+ dataType = DataTypes.SMALLINT();
+ break;
+ case INTEGER:
+ dataType = DataTypes.INT();
+ break;
+ case BIGINT:
+ dataType = DataTypes.BIGINT();
+ break;
+ case FLOAT:
+ dataType = DataTypes.FLOAT();
+ break;
+ case DOUBLE:
+ dataType = DataTypes.DOUBLE();
+ break;
+ case DATE:
+ dataType = DataTypes.DATE();
+ break;
+ case TIME_WITHOUT_TIME_ZONE:
+ dataType = DataTypes.TIME(getPrecision(logicalType));
+ break;
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ dataType = DataTypes.TIMESTAMP(getPrecision(logicalType));
+ break;
+ case TIMESTAMP_WITH_TIME_ZONE:
+ dataType = DataTypes.TIMESTAMP_TZ(getPrecision(logicalType));
+ break;
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ dataType = DataTypes.TIMESTAMP_LTZ(getPrecision(logicalType));
+ break;
+ case ARRAY:
+ Preconditions.checkState(children != null && !children.isEmpty());
+ dataType = DataTypes.ARRAY(fromFlinkDataType(children.get(0)));
+ break;
+ case MAP:
+ Preconditions.checkState(children != null && children.size() > 1);
+ dataType =
+ DataTypes.MAP(
+ fromFlinkDataType(children.get(0)),
+ fromFlinkDataType(children.get(1)));
+ break;
+ case ROW:
+ Preconditions.checkState(!CollectionUtil.isNullOrEmpty(children));
+ org.apache.flink.table.types.logical.RowType rowType =
+ (org.apache.flink.table.types.logical.RowType) flinkType.getLogicalType();
+ DataField[] fields =
+ rowType.getFields().stream()
+ .map(DataField::fromFlinkDataTypeField)
+ .toArray(DataField[]::new);
+ dataType = DataTypes.ROW(fields);
+ break;
+ case INTERVAL_YEAR_MONTH:
+ case INTERVAL_DAY_TIME:
+ case NULL:
+ case MULTISET:
+ case DISTINCT_TYPE:
+ case STRUCTURED_TYPE:
+ case RAW:
+ case SYMBOL:
+ case UNRESOLVED:
+ throw new IllegalArgumentException("Unsupported type: " + flinkType);
+ default:
+ throw new IllegalArgumentException("Illegal type: " + flinkType);
+ }
+ return logicalType.isNullable() ? dataType : dataType.notNull();
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataInputViewStream.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataInputViewStream.java
new file mode 100644
index 00000000000..8fecafba71a
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataInputViewStream.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.utils.runtime;
+
+import org.apache.flink.core.memory.DataInputView;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/** An input stream that draws its data from a {@link DataInputView}. */
+public class DataInputViewStream extends InputStream {
+
+ protected DataInputView inputView;
+
+ public DataInputViewStream(DataInputView inputView) {
+ this.inputView = inputView;
+ }
+
+ public DataInputView getInputView() {
+ return inputView;
+ }
+
+ @Override
+ public int read() throws IOException {
+ try {
+ return inputView.readUnsignedByte();
+ } catch (EOFException ex) {
+ return -1;
+ }
+ }
+
+ @Override
+ public long skip(long n) throws IOException {
+ long toSkipRemaining = n;
+ while (toSkipRemaining > Integer.MAX_VALUE) {
+ int skippedBytes = inputView.skipBytes(Integer.MAX_VALUE);
+
+ if (skippedBytes == 0) {
+ return n - toSkipRemaining;
+ }
+
+ toSkipRemaining -= skippedBytes;
+ }
+ return n - (toSkipRemaining - inputView.skipBytes((int) toSkipRemaining));
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return inputView.read(b, off, len);
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataOutputViewStream.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataOutputViewStream.java
new file mode 100644
index 00000000000..5798ceb123a
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/utils/runtime/DataOutputViewStream.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.utils.runtime;
+
+import org.apache.flink.core.memory.DataOutputView;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/** An output stream that writes its data to a {@link DataOutputView}. */
+public class DataOutputViewStream extends OutputStream {
+ protected DataOutputView outputView;
+
+ public DataOutputViewStream(DataOutputView outputView) {
+ this.outputView = outputView;
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ outputView.writeByte(b);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ outputView.write(b, off, len);
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariant.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariant.java
new file mode 100644
index 00000000000..5aab5369755
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariant.java
@@ -0,0 +1,500 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.variant;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonFactory;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator;
+
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.temporal.ChronoUnit;
+import java.util.Arrays;
+import java.util.Base64;
+import java.util.Objects;
+
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.BINARY_SEARCH_THRESHOLD;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.SIZE_LIMIT;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.TIMESTAMP_FORMATTER;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.TIMESTAMP_LTZ_FORMATTER;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.VERSION;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.VERSION_MASK;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.checkIndex;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.getMetadataKey;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.handleArray;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.handleObject;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.malformedVariant;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.readUnsigned;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.unexpectedType;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.valueSize;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.variantConstructorSizeLimit;
+
+/**
+ * Copy from BinaryVariant.java .
+ *
+ * A data structure that represents a semi-structured value. It consists of two binary values:
+ * value and metadata. The value encodes types and values, but not field names. The metadata
+ * currently contains a version flag and a list of field names. We can extend/modify the detailed
+ * binary format given the version flag.
+ *
+ * @see Variant
+ * Binary Encoding for the detail layout of the data structure.
+ */
+@Internal
+public final class BinaryVariant implements Variant {
+
+ private final byte[] value;
+ private final byte[] metadata;
+ // The variant value doesn't use the whole `value` binary, but starts from its `pos` index and
+ // spans a size of `valueSize(value, pos)`. This design avoids frequent copies of the value
+ // binary when reading a sub-variant in the array/object element.
+ private final int pos;
+
+ public BinaryVariant(byte[] value, byte[] metadata) {
+ this(value, metadata, 0);
+ }
+
+ private BinaryVariant(byte[] value, byte[] metadata, int pos) {
+ this.value = value;
+ this.metadata = metadata;
+ this.pos = pos;
+ // There is currently only one allowed version.
+ if (metadata.length < 1 || (metadata[0] & VERSION_MASK) != VERSION) {
+ throw malformedVariant();
+ }
+ // Don't attempt to use a Variant larger than 16 MiB. We'll never produce one, and it risks
+ // memory instability.
+ if (metadata.length > SIZE_LIMIT || value.length > SIZE_LIMIT) {
+ throw variantConstructorSizeLimit();
+ }
+ }
+
+ @Override
+ public boolean isPrimitive() {
+ return !isArray() && !isObject();
+ }
+
+ @Override
+ public boolean isArray() {
+ return getType() == Type.ARRAY;
+ }
+
+ @Override
+ public boolean isObject() {
+ return getType() == Type.OBJECT;
+ }
+
+ @Override
+ public boolean isNull() {
+ return getType() == Type.NULL;
+ }
+
+ @Override
+ public Type getType() {
+ return BinaryVariantUtil.getType(value, pos);
+ }
+
+ @Override
+ public boolean getBoolean() throws VariantTypeException {
+ checkType(Type.BOOLEAN, getType());
+ return BinaryVariantUtil.getBoolean(value, pos);
+ }
+
+ @Override
+ public byte getByte() throws VariantTypeException {
+ checkType(Type.TINYINT, getType());
+ return (byte) BinaryVariantUtil.getLong(value, pos);
+ }
+
+ @Override
+ public short getShort() throws VariantTypeException {
+ checkType(Type.SMALLINT, getType());
+ return (short) BinaryVariantUtil.getLong(value, pos);
+ }
+
+ @Override
+ public int getInt() throws VariantTypeException {
+ checkType(Type.INT, getType());
+ return (int) BinaryVariantUtil.getLong(value, pos);
+ }
+
+ @Override
+ public long getLong() throws VariantTypeException {
+ checkType(Type.BIGINT, getType());
+ return BinaryVariantUtil.getLong(value, pos);
+ }
+
+ @Override
+ public float getFloat() throws VariantTypeException {
+ checkType(Type.FLOAT, getType());
+ return BinaryVariantUtil.getFloat(value, pos);
+ }
+
+ @Override
+ public BigDecimal getDecimal() throws VariantTypeException {
+ checkType(Type.DECIMAL, getType());
+ return BinaryVariantUtil.getDecimal(value, pos);
+ }
+
+ @Override
+ public double getDouble() throws VariantTypeException {
+ checkType(Type.DOUBLE, getType());
+ return BinaryVariantUtil.getDouble(value, pos);
+ }
+
+ @Override
+ public String getString() throws VariantTypeException {
+ checkType(Type.STRING, getType());
+ return BinaryVariantUtil.getString(value, pos);
+ }
+
+ @Override
+ public LocalDate getDate() throws VariantTypeException {
+ checkType(Type.DATE, getType());
+ return LocalDate.ofEpochDay(BinaryVariantUtil.getLong(value, pos));
+ }
+
+ @Override
+ public LocalDateTime getDateTime() throws VariantTypeException {
+ checkType(Type.TIMESTAMP, getType());
+ return microsToInstant(BinaryVariantUtil.getLong(value, pos))
+ .atZone(ZoneOffset.UTC)
+ .toLocalDateTime();
+ }
+
+ @Override
+ public Instant getInstant() throws VariantTypeException {
+ checkType(Type.TIMESTAMP_LTZ, getType());
+ return microsToInstant(BinaryVariantUtil.getLong(value, pos));
+ }
+
+ @Override
+ public byte[] getBytes() throws VariantTypeException {
+ checkType(Type.BYTES, getType());
+ return BinaryVariantUtil.getBinary(value, pos);
+ }
+
+ @Override
+ public Object get() throws VariantTypeException {
+ switch (getType()) {
+ case NULL:
+ return null;
+ case BOOLEAN:
+ return getBoolean();
+ case TINYINT:
+ return getByte();
+ case SMALLINT:
+ return getShort();
+ case INT:
+ return getInt();
+ case BIGINT:
+ return getLong();
+ case FLOAT:
+ return getFloat();
+ case DOUBLE:
+ return getDouble();
+ case DECIMAL:
+ return getDecimal();
+ case STRING:
+ return getString();
+ case DATE:
+ return getDate();
+ case TIMESTAMP:
+ return getDateTime();
+ case TIMESTAMP_LTZ:
+ return getInstant();
+ case BYTES:
+ return getBytes();
+ default:
+ throw new VariantTypeException(
+ String.format("Expecting a primitive variant but got %s", getType()));
+ }
+ }
+
+ @Override
+ public T getAs() throws VariantTypeException {
+ return (T) get();
+ }
+
+ @Override
+ public Variant getElement(int index) throws VariantTypeException {
+ return getElementAtIndex(index);
+ }
+
+ @Override
+ public Variant getField(String fieldName) throws VariantTypeException {
+ return getFieldByKey(fieldName);
+ }
+
+ @Override
+ public String toJson() {
+ StringBuilder sb = new StringBuilder();
+ toJsonImpl(value, metadata, pos, sb, ZoneOffset.UTC);
+ return sb.toString();
+ }
+
+ public byte[] getValue() {
+ if (pos == 0) {
+ return value;
+ }
+ int size = valueSize(value, pos);
+ checkIndex(pos + size - 1, value.length);
+ return Arrays.copyOfRange(value, pos, pos + size);
+ }
+
+ public byte[] getMetadata() {
+ return metadata;
+ }
+
+ public int getPos() {
+ return pos;
+ }
+
+ private static void toJsonImpl(
+ byte[] value, byte[] metadata, int pos, StringBuilder sb, ZoneId zoneId) {
+ switch (BinaryVariantUtil.getType(value, pos)) {
+ case OBJECT:
+ handleObject(
+ value,
+ pos,
+ (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
+ sb.append('{');
+ for (int i = 0; i < size; ++i) {
+ int id = readUnsigned(value, idStart + idSize * i, idSize);
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * i, offsetSize);
+ int elementPos = dataStart + offset;
+ if (i != 0) {
+ sb.append(',');
+ }
+ sb.append(escapeJson(getMetadataKey(metadata, id)));
+ sb.append(':');
+ toJsonImpl(value, metadata, elementPos, sb, zoneId);
+ }
+ sb.append('}');
+ return null;
+ });
+ break;
+ case ARRAY:
+ handleArray(
+ value,
+ pos,
+ (size, offsetSize, offsetStart, dataStart) -> {
+ sb.append('[');
+ for (int i = 0; i < size; ++i) {
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * i, offsetSize);
+ int elementPos = dataStart + offset;
+ if (i != 0) {
+ sb.append(',');
+ }
+ toJsonImpl(value, metadata, elementPos, sb, zoneId);
+ }
+ sb.append(']');
+ return null;
+ });
+ break;
+ case NULL:
+ sb.append("null");
+ break;
+ case BOOLEAN:
+ sb.append(BinaryVariantUtil.getBoolean(value, pos));
+ break;
+ case TINYINT:
+ case SMALLINT:
+ case INT:
+ case BIGINT:
+ sb.append(BinaryVariantUtil.getLong(value, pos));
+ break;
+ case STRING:
+ sb.append(escapeJson(BinaryVariantUtil.getString(value, pos)));
+ break;
+ case DOUBLE:
+ sb.append(BinaryVariantUtil.getDouble(value, pos));
+ break;
+ case DECIMAL:
+ sb.append(BinaryVariantUtil.getDecimal(value, pos).toPlainString());
+ break;
+ case DATE:
+ appendQuoted(
+ sb,
+ LocalDate.ofEpochDay((int) BinaryVariantUtil.getLong(value, pos))
+ .toString());
+ break;
+ case TIMESTAMP_LTZ:
+ appendQuoted(
+ sb,
+ TIMESTAMP_LTZ_FORMATTER.format(
+ microsToInstant(BinaryVariantUtil.getLong(value, pos))
+ .atZone(zoneId)));
+ break;
+ case TIMESTAMP:
+ appendQuoted(
+ sb,
+ TIMESTAMP_FORMATTER.format(
+ microsToInstant(BinaryVariantUtil.getLong(value, pos))
+ .atZone(ZoneOffset.UTC)));
+ break;
+ case FLOAT:
+ sb.append(BinaryVariantUtil.getFloat(value, pos));
+ break;
+ case BYTES:
+ appendQuoted(
+ sb,
+ Base64.getEncoder()
+ .encodeToString(BinaryVariantUtil.getBinary(value, pos)));
+ break;
+ default:
+ throw unexpectedType(BinaryVariantUtil.getType(value, pos));
+ }
+ }
+
+ private static Instant microsToInstant(long timestamp) {
+ return Instant.EPOCH.plus(timestamp, ChronoUnit.MICROS);
+ }
+
+ private void checkType(Type expected, Type actual) {
+ if (expected != actual) {
+ throw new VariantTypeException(
+ String.format("Expected type %s but got %s", expected, actual));
+ }
+ }
+
+ // Find the field value whose key is equal to `key`. Return null if the key is not found.
+ // It is only legal to call it when `getType()` is `Type.OBJECT`.
+ private BinaryVariant getFieldByKey(String key) {
+ return handleObject(
+ value,
+ pos,
+ (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
+ // Use linear search for a short list. Switch to binary search when the length
+ // reaches `BINARY_SEARCH_THRESHOLD`.
+ if (size < BINARY_SEARCH_THRESHOLD) {
+ for (int i = 0; i < size; ++i) {
+ int id = readUnsigned(value, idStart + idSize * i, idSize);
+ if (key.equals(getMetadataKey(metadata, id))) {
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * i, offsetSize);
+ return new BinaryVariant(value, metadata, dataStart + offset);
+ }
+ }
+ } else {
+ int low = 0;
+ int high = size - 1;
+ while (low <= high) {
+ // Use unsigned right shift to compute the middle of `low` and `high`.
+ // This is not only a performance optimization, because it can properly
+ // handle the case where `low + high` overflows int.
+ int mid = (low + high) >>> 1;
+ int id = readUnsigned(value, idStart + idSize * mid, idSize);
+ int cmp = getMetadataKey(metadata, id).compareTo(key);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * mid, offsetSize);
+ return new BinaryVariant(value, metadata, dataStart + offset);
+ }
+ }
+ }
+ return null;
+ });
+ }
+
+ // Get the number of array elements in the variant.
+ // It is only legal to call it when `getType()` is `Type.ARRAY`.
+ @Override
+ public int arraySize() {
+ return handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) -> size);
+ }
+
+ // Get the array element at the `index` slot. Return null if `index` is out of the bound of
+ // `[0, arraySize())`.
+ // It is only legal to call it when `getType()` is `Type.ARRAY`.
+ private BinaryVariant getElementAtIndex(int index) {
+ return handleArray(
+ value,
+ pos,
+ (size, offsetSize, offsetStart, dataStart) -> {
+ if (index < 0 || index >= size) {
+ return null;
+ }
+ int offset = readUnsigned(value, offsetStart + offsetSize * index, offsetSize);
+ return new BinaryVariant(value, metadata, dataStart + offset);
+ });
+ }
+
+ // Escape a string so that it can be pasted into JSON structure.
+ // For example, if `str` only contains a new-line character, then the result content is "\n"
+ // (4 characters).
+ private static String escapeJson(String str) {
+ try (CharArrayWriter writer = new CharArrayWriter();
+ JsonGenerator gen = new JsonFactory().createGenerator(writer)) {
+ gen.writeString(str);
+ gen.flush();
+ return writer.toString();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void appendQuoted(StringBuilder sb, String str) {
+ sb.append('"');
+ sb.append(str);
+ sb.append('"');
+ }
+
+ @Override
+ public String toString() {
+ return toJson();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof BinaryVariant)) {
+ return false;
+ }
+ BinaryVariant variant = (BinaryVariant) o;
+ return getPos() == variant.getPos()
+ && Objects.deepEquals(getValue(), variant.getValue())
+ && Objects.deepEquals(getMetadata(), variant.getMetadata());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(Arrays.hashCode(value), Arrays.hashCode(metadata), pos);
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilder.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilder.java
new file mode 100644
index 00000000000..b53c94bbe1f
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilder.java
@@ -0,0 +1,657 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.variant;
+
+import org.apache.flink.cdc.common.annotation.Internal;
+
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonFactory;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParseException;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonToken;
+import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.exc.InputCoercionException;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.ARRAY;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.BASIC_TYPE_MASK;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.BINARY;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.DATE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.DECIMAL16;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.DECIMAL4;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.DECIMAL8;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.DOUBLE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.FALSE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.FLOAT;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.INT1;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.INT2;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.INT4;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.INT8;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.LONG_STR;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.MAX_DECIMAL16_PRECISION;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.MAX_DECIMAL4_PRECISION;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.MAX_DECIMAL8_PRECISION;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.MAX_SHORT_STR_SIZE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.NULL;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.OBJECT;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.SIZE_LIMIT;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.TIMESTAMP;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.TIMESTAMP_LTZ;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.TRUE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.U16_MAX;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.U24_MAX;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.U24_SIZE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.U32_SIZE;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.U8_MAX;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.VERSION;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.arrayHeader;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.checkIndex;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.getMetadataKey;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.handleArray;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.handleObject;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.objectHeader;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.primitiveHeader;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.readUnsigned;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.shortStrHeader;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.valueSize;
+import static org.apache.flink.cdc.common.types.variant.BinaryVariantUtil.writeLong;
+
+/**
+ * Copy from BinaryVariantBuilder.java
+ *
+ * The internal builder for {@link BinaryVariant}.
+ */
+@Internal
+public class BinaryVariantInternalBuilder {
+
+ public static final VariantTypeException VARIANT_SIZE_LIMIT_EXCEPTION =
+ new VariantTypeException("VARIANT_SIZE_LIMIT");
+ public static final VariantTypeException VARIANT_DUPLICATE_KEY_EXCEPTION =
+ new VariantTypeException("VARIANT_DUPLICATE_KEY");
+
+ public BinaryVariantInternalBuilder(boolean allowDuplicateKeys) {
+ this.allowDuplicateKeys = allowDuplicateKeys;
+ }
+
+ /**
+ * Parse a JSON string as a Variant value.
+ *
+ * @throws IOException if any JSON parsing error happens.
+ */
+ public static BinaryVariant parseJson(String json, boolean allowDuplicateKeys)
+ throws IOException {
+ try (JsonParser parser = new JsonFactory().createParser(json)) {
+ parser.nextToken();
+ return parseJson(parser, allowDuplicateKeys);
+ }
+ }
+
+ /**
+ * Similar {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string input.
+ */
+ private static BinaryVariant parseJson(JsonParser parser, boolean allowDuplicateKeys)
+ throws IOException {
+ BinaryVariantInternalBuilder builder = new BinaryVariantInternalBuilder(allowDuplicateKeys);
+ builder.buildJson(parser);
+ return builder.build();
+ }
+
+ // Build the variant metadata from `dictionaryKeys` and return the variant result.
+ public BinaryVariant build() {
+ int numKeys = dictionaryKeys.size();
+ // Use long to avoid overflow in accumulating lengths.
+ long dictionaryStringSize = 0;
+ for (byte[] key : dictionaryKeys) {
+ dictionaryStringSize += key.length;
+ }
+ // Determine the number of bytes required per offset entry.
+ // The largest offset is the one-past-the-end value, which is total string size. It's very
+ // unlikely that the number of keys could be larger, but incorporate that into the
+ // calculation
+ // in case of pathological data.
+ long maxSize = Math.max(dictionaryStringSize, numKeys);
+ if (maxSize > SIZE_LIMIT) {
+ throw VARIANT_SIZE_LIMIT_EXCEPTION;
+ }
+ int offsetSize = getIntegerSize((int) maxSize);
+
+ int offsetStart = 1 + offsetSize;
+ int stringStart = offsetStart + (numKeys + 1) * offsetSize;
+ long metadataSize = stringStart + dictionaryStringSize;
+
+ if (metadataSize > SIZE_LIMIT) {
+ throw VARIANT_SIZE_LIMIT_EXCEPTION;
+ }
+ byte[] metadata = new byte[(int) metadataSize];
+ int headerByte = VERSION | ((offsetSize - 1) << 6);
+ writeLong(metadata, 0, headerByte, 1);
+ writeLong(metadata, 1, numKeys, offsetSize);
+ int currentOffset = 0;
+ for (int i = 0; i < numKeys; ++i) {
+ writeLong(metadata, offsetStart + i * offsetSize, currentOffset, offsetSize);
+ byte[] key = dictionaryKeys.get(i);
+ System.arraycopy(key, 0, metadata, stringStart + currentOffset, key.length);
+ currentOffset += key.length;
+ }
+ writeLong(metadata, offsetStart + numKeys * offsetSize, currentOffset, offsetSize);
+ return new BinaryVariant(Arrays.copyOfRange(writeBuffer, 0, writePos), metadata);
+ }
+
+ public void appendString(String str) {
+ byte[] text = str.getBytes(StandardCharsets.UTF_8);
+ boolean longStr = text.length > MAX_SHORT_STR_SIZE;
+ checkCapacity((longStr ? 1 + U32_SIZE : 1) + text.length);
+ if (longStr) {
+ writeBuffer[writePos++] = primitiveHeader(LONG_STR);
+ writeLong(writeBuffer, writePos, text.length, U32_SIZE);
+ writePos += U32_SIZE;
+ } else {
+ writeBuffer[writePos++] = shortStrHeader(text.length);
+ }
+ System.arraycopy(text, 0, writeBuffer, writePos, text.length);
+ writePos += text.length;
+ }
+
+ public void appendNull() {
+ checkCapacity(1);
+ writeBuffer[writePos++] = primitiveHeader(NULL);
+ }
+
+ public void appendBoolean(boolean b) {
+ checkCapacity(1);
+ writeBuffer[writePos++] = primitiveHeader(b ? TRUE : FALSE);
+ }
+
+ public void appendByte(byte b) {
+ checkCapacity(1 + 1);
+ writeBuffer[writePos++] = primitiveHeader(INT1);
+ writeLong(writeBuffer, writePos, b, 1);
+ writePos += 1;
+ }
+
+ public void appendShort(short s) {
+ checkCapacity(1 + 2);
+ writeBuffer[writePos++] = primitiveHeader(INT2);
+ writeLong(writeBuffer, writePos, s, 2);
+ writePos += 2;
+ }
+
+ public void appendInt(int i) {
+ checkCapacity(1 + 4);
+ writeBuffer[writePos++] = primitiveHeader(INT4);
+ writeLong(writeBuffer, writePos, i, 4);
+ writePos += 4;
+ }
+
+ public void appendLong(long l) {
+ checkCapacity(1 + 8);
+ writeBuffer[writePos++] = primitiveHeader(INT8);
+ writeLong(writeBuffer, writePos, l, 8);
+ writePos += 8;
+ }
+
+ public void appendNumeric(long l) {
+ if (l == (byte) l) {
+ appendByte((byte) l);
+ } else if (l == (short) l) {
+ appendShort((short) l);
+ } else if (l == (int) l) {
+ appendInt((int) l);
+ } else {
+ appendLong(l);
+ }
+ }
+
+ public void appendDouble(double d) {
+ checkCapacity(1 + 8);
+ writeBuffer[writePos++] = primitiveHeader(DOUBLE);
+ writeLong(writeBuffer, writePos, Double.doubleToLongBits(d), 8);
+ writePos += 8;
+ }
+
+ // Append a decimal value to the variant builder. The caller should guarantee that its precision
+ // and scale fit into `MAX_DECIMAL16_PRECISION`.
+ public void appendDecimal(BigDecimal d) {
+ checkCapacity(2 + 16);
+ BigInteger unscaled = d.unscaledValue();
+ if (d.scale() <= MAX_DECIMAL4_PRECISION && d.precision() <= MAX_DECIMAL4_PRECISION) {
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL4);
+ writeBuffer[writePos++] = (byte) d.scale();
+ writeLong(writeBuffer, writePos, unscaled.intValueExact(), 4);
+ writePos += 4;
+ } else if (d.scale() <= MAX_DECIMAL8_PRECISION && d.precision() <= MAX_DECIMAL8_PRECISION) {
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL8);
+ writeBuffer[writePos++] = (byte) d.scale();
+ writeLong(writeBuffer, writePos, unscaled.longValueExact(), 8);
+ writePos += 8;
+ } else {
+ assert d.scale() <= MAX_DECIMAL16_PRECISION && d.precision() <= MAX_DECIMAL16_PRECISION;
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL16);
+ writeBuffer[writePos++] = (byte) d.scale();
+ // `toByteArray` returns a big-endian representation. We need to copy it reversely and
+ // sign
+ // extend it to 16 bytes.
+ byte[] bytes = unscaled.toByteArray();
+ for (int i = 0; i < bytes.length; ++i) {
+ writeBuffer[writePos + i] = bytes[bytes.length - 1 - i];
+ }
+ byte sign = (byte) (bytes[0] < 0 ? -1 : 0);
+ for (int i = bytes.length; i < 16; ++i) {
+ writeBuffer[writePos + i] = sign;
+ }
+ writePos += 16;
+ }
+ }
+
+ public void appendDate(int daysSinceEpoch) {
+ checkCapacity(1 + 4);
+ writeBuffer[writePos++] = primitiveHeader(DATE);
+ writeLong(writeBuffer, writePos, daysSinceEpoch, 4);
+ writePos += 4;
+ }
+
+ public void appendTimestampLtz(long microsSinceEpoch) {
+ checkCapacity(1 + 8);
+ writeBuffer[writePos++] = primitiveHeader(TIMESTAMP_LTZ);
+ writeLong(writeBuffer, writePos, microsSinceEpoch, 8);
+ writePos += 8;
+ }
+
+ public void appendTimestamp(long microsSinceEpoch) {
+ checkCapacity(1 + 8);
+ writeBuffer[writePos++] = primitiveHeader(TIMESTAMP);
+ writeLong(writeBuffer, writePos, microsSinceEpoch, 8);
+ writePos += 8;
+ }
+
+ public void appendFloat(float f) {
+ checkCapacity(1 + 4);
+ writeBuffer[writePos++] = primitiveHeader(FLOAT);
+ writeLong(writeBuffer, writePos, Float.floatToIntBits(f), 8);
+ writePos += 4;
+ }
+
+ public void appendBinary(byte[] binary) {
+ checkCapacity(1 + U32_SIZE + binary.length);
+ writeBuffer[writePos++] = primitiveHeader(BINARY);
+ writeLong(writeBuffer, writePos, binary.length, U32_SIZE);
+ writePos += U32_SIZE;
+ System.arraycopy(binary, 0, writeBuffer, writePos, binary.length);
+ writePos += binary.length;
+ }
+
+ // Add a key to the variant dictionary. If the key already exists, the dictionary is not
+ // modified.
+ // In either case, return the id of the key.
+ public int addKey(String key) {
+ int id;
+ if (dictionary.containsKey(key)) {
+ id = dictionary.get(key);
+ } else {
+ id = dictionaryKeys.size();
+ dictionary.put(key, id);
+ dictionaryKeys.add(key.getBytes(StandardCharsets.UTF_8));
+ }
+ return id;
+ }
+
+ // Return the current write position of the variant builder. It is used together with
+ // `finishWritingObject` or `finishWritingArray`.
+ public int getWritePos() {
+ return writePos;
+ }
+
+ // Finish writing a variant object after all of its fields have already been written. The
+ // process
+ // is as follows:
+ // 1. The caller calls `getWritePos` before writing any fields to obtain the `start` parameter.
+ // 2. The caller appends all the object fields to the builder. In the meantime, it should
+ // maintain
+ // the `fields` parameter. Before appending each field, it should append an entry to `fields` to
+ // record the offset of the field. The offset is computed as `getWritePos() - start`.
+ // 3. The caller calls `finishWritingObject` to finish writing a variant object.
+ //
+ // This function is responsible to sort the fields by key. If there are duplicate field keys:
+ // - when `allowDuplicateKeys` is true, the field with the greatest offset value (the last
+ // appended one) is kept.
+ // - otherwise, throw an exception.
+ public void finishWritingObject(int start, ArrayList fields) {
+ int size = fields.size();
+ Collections.sort(fields);
+ int maxId = size == 0 ? 0 : fields.get(0).id;
+ if (allowDuplicateKeys) {
+ int distinctPos = 0;
+ // Maintain a list of distinct keys in-place.
+ for (int i = 1; i < size; ++i) {
+ maxId = Math.max(maxId, fields.get(i).id);
+ if (fields.get(i).id == fields.get(i - 1).id) {
+ // Found a duplicate key. Keep the field with a greater offset.
+ if (fields.get(distinctPos).offset < fields.get(i).offset) {
+ fields.set(
+ distinctPos,
+ fields.get(distinctPos).withNewOffset(fields.get(i).offset));
+ }
+ } else {
+ // Found a distinct key. Add the field to the list.
+ ++distinctPos;
+ fields.set(distinctPos, fields.get(i));
+ }
+ }
+ if (distinctPos + 1 < fields.size()) {
+ size = distinctPos + 1;
+ // Resize `fields` to `size`.
+ fields.subList(size, fields.size()).clear();
+ // Sort the fields by offsets so that we can move the value data of each field to
+ // the new
+ // offset without overwriting the fields after it.
+ fields.sort(Comparator.comparingInt(f -> f.offset));
+ int currentOffset = 0;
+ for (int i = 0; i < size; ++i) {
+ int oldOffset = fields.get(i).offset;
+ int fieldSize = valueSize(writeBuffer, start + oldOffset);
+ System.arraycopy(
+ writeBuffer,
+ start + oldOffset,
+ writeBuffer,
+ start + currentOffset,
+ fieldSize);
+ fields.set(i, fields.get(i).withNewOffset(currentOffset));
+ currentOffset += fieldSize;
+ }
+ writePos = start + currentOffset;
+ // Change back to the sort order by field keys to meet the variant spec.
+ Collections.sort(fields);
+ }
+ } else {
+ for (int i = 1; i < size; ++i) {
+ maxId = Math.max(maxId, fields.get(i).id);
+ String key = fields.get(i).key;
+ if (key.equals(fields.get(i - 1).key)) {
+ throw VARIANT_DUPLICATE_KEY_EXCEPTION;
+ }
+ }
+ }
+ int dataSize = writePos - start;
+ boolean largeSize = size > U8_MAX;
+ int sizeBytes = largeSize ? U32_SIZE : 1;
+ int idSize = getIntegerSize(maxId);
+ int offsetSize = getIntegerSize(dataSize);
+ // The space for header byte, object size, id list, and offset list.
+ int headerSize = 1 + sizeBytes + size * idSize + (size + 1) * offsetSize;
+ checkCapacity(headerSize);
+ // Shift the just-written field data to make room for the object header section.
+ System.arraycopy(writeBuffer, start, writeBuffer, start + headerSize, dataSize);
+ writePos += headerSize;
+ writeBuffer[start] = objectHeader(largeSize, idSize, offsetSize);
+ writeLong(writeBuffer, start + 1, size, sizeBytes);
+ int idStart = start + 1 + sizeBytes;
+ int offsetStart = idStart + size * idSize;
+ for (int i = 0; i < size; ++i) {
+ writeLong(writeBuffer, idStart + i * idSize, fields.get(i).id, idSize);
+ writeLong(writeBuffer, offsetStart + i * offsetSize, fields.get(i).offset, offsetSize);
+ }
+ writeLong(writeBuffer, offsetStart + size * offsetSize, dataSize, offsetSize);
+ }
+
+ // Finish writing a variant array after all of its elements have already been written. The
+ // process
+ // is similar to that of `finishWritingObject`.
+ public void finishWritingArray(int start, ArrayList offsets) {
+ int dataSize = writePos - start;
+ int size = offsets.size();
+ boolean largeSize = size > U8_MAX;
+ int sizeBytes = largeSize ? U32_SIZE : 1;
+ int offsetSize = getIntegerSize(dataSize);
+ // The space for header byte, object size, and offset list.
+ int headerSize = 1 + sizeBytes + (size + 1) * offsetSize;
+ checkCapacity(headerSize);
+ // Shift the just-written field data to make room for the header section.
+ System.arraycopy(writeBuffer, start, writeBuffer, start + headerSize, dataSize);
+ writePos += headerSize;
+ writeBuffer[start] = arrayHeader(largeSize, offsetSize);
+ writeLong(writeBuffer, start + 1, size, sizeBytes);
+ int offsetStart = start + 1 + sizeBytes;
+ for (int i = 0; i < size; ++i) {
+ writeLong(writeBuffer, offsetStart + i * offsetSize, offsets.get(i), offsetSize);
+ }
+ writeLong(writeBuffer, offsetStart + size * offsetSize, dataSize, offsetSize);
+ }
+
+ // Append a variant value to the variant builder. We need to insert the keys in the input
+ // variant
+ // into the current variant dictionary and rebuild it with new field ids. For scalar values in
+ // the
+ // input variant, we can directly copy the binary slice.
+ public void appendVariant(BinaryVariant v) {
+ appendVariantImpl(v.getValue(), v.getMetadata(), v.getPos());
+ }
+
+ private void appendVariantImpl(byte[] value, byte[] metadata, int pos) {
+ checkIndex(pos, value.length);
+ int basicType = value[pos] & BASIC_TYPE_MASK;
+ switch (basicType) {
+ case OBJECT:
+ handleObject(
+ value,
+ pos,
+ (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
+ ArrayList fields = new ArrayList<>(size);
+ int start = writePos;
+ for (int i = 0; i < size; ++i) {
+ int id = readUnsigned(value, idStart + idSize * i, idSize);
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * i, offsetSize);
+ int elementPos = dataStart + offset;
+ String key = getMetadataKey(metadata, id);
+ int newId = addKey(key);
+ fields.add(new FieldEntry(key, newId, writePos - start));
+ appendVariantImpl(value, metadata, elementPos);
+ }
+ finishWritingObject(start, fields);
+ return null;
+ });
+ break;
+ case ARRAY:
+ handleArray(
+ value,
+ pos,
+ (size, offsetSize, offsetStart, dataStart) -> {
+ ArrayList offsets = new ArrayList<>(size);
+ int start = writePos;
+ for (int i = 0; i < size; ++i) {
+ int offset =
+ readUnsigned(
+ value, offsetStart + offsetSize * i, offsetSize);
+ int elementPos = dataStart + offset;
+ offsets.add(writePos - start);
+ appendVariantImpl(value, metadata, elementPos);
+ }
+ finishWritingArray(start, offsets);
+ return null;
+ });
+ break;
+ default:
+ shallowAppendVariantImpl(value, pos);
+ break;
+ }
+ }
+
+ private void shallowAppendVariantImpl(byte[] value, int pos) {
+ int size = valueSize(value, pos);
+ checkIndex(pos + size - 1, value.length);
+ checkCapacity(size);
+ System.arraycopy(value, pos, writeBuffer, writePos, size);
+ writePos += size;
+ }
+
+ private void checkCapacity(int additional) {
+ int required = writePos + additional;
+ if (required > writeBuffer.length) {
+ // Allocate a new buffer with a capacity of the next power of 2 of `required`.
+ int newCapacity = Integer.highestOneBit(required);
+ newCapacity = newCapacity < required ? newCapacity * 2 : newCapacity;
+ if (newCapacity > SIZE_LIMIT) {
+ throw VARIANT_SIZE_LIMIT_EXCEPTION;
+ }
+ byte[] newValue = new byte[newCapacity];
+ System.arraycopy(writeBuffer, 0, newValue, 0, writePos);
+ writeBuffer = newValue;
+ }
+ }
+
+ /**
+ * Temporarily store the information of a field. We need to collect all fields in an JSON
+ * object, sort them by their keys, and build the variant object in sorted order.
+ */
+ public static final class FieldEntry implements Comparable {
+ final String key;
+ final int id;
+ final int offset;
+
+ public FieldEntry(String key, int id, int offset) {
+ this.key = key;
+ this.id = id;
+ this.offset = offset;
+ }
+
+ FieldEntry withNewOffset(int newOffset) {
+ return new FieldEntry(key, id, newOffset);
+ }
+
+ @Override
+ public int compareTo(FieldEntry other) {
+ return key.compareTo(other.key);
+ }
+ }
+
+ private void buildJson(JsonParser parser) throws IOException {
+ JsonToken token = parser.currentToken();
+ if (token == null) {
+ throw new JsonParseException(parser, "Unexpected null token");
+ }
+ switch (token) {
+ case START_OBJECT:
+ {
+ ArrayList fields = new ArrayList<>();
+ int start = writePos;
+ while (parser.nextToken() != JsonToken.END_OBJECT) {
+ String key = parser.currentName();
+ parser.nextToken();
+ int id = addKey(key);
+ fields.add(new FieldEntry(key, id, writePos - start));
+ buildJson(parser);
+ }
+ finishWritingObject(start, fields);
+ break;
+ }
+ case START_ARRAY:
+ {
+ ArrayList offsets = new ArrayList<>();
+ int start = writePos;
+ while (parser.nextToken() != JsonToken.END_ARRAY) {
+ offsets.add(writePos - start);
+ buildJson(parser);
+ }
+ finishWritingArray(start, offsets);
+ break;
+ }
+ case VALUE_STRING:
+ appendString(parser.getText());
+ break;
+ case VALUE_NUMBER_INT:
+ try {
+ appendNumeric(parser.getLongValue());
+ } catch (InputCoercionException ignored) {
+ // If the value doesn't fit any integer type, parse it as decimal or floating
+ // instead.
+ parseFloatingPoint(parser);
+ }
+ break;
+ case VALUE_NUMBER_FLOAT:
+ parseFloatingPoint(parser);
+ break;
+ case VALUE_TRUE:
+ appendBoolean(true);
+ break;
+ case VALUE_FALSE:
+ appendBoolean(false);
+ break;
+ case VALUE_NULL:
+ appendNull();
+ break;
+ default:
+ throw new JsonParseException(parser, "Unexpected token " + token);
+ }
+ }
+
+ // Choose the smallest unsigned integer type that can store `value`. It must be within
+ // `[0, U24_MAX]`.
+ private int getIntegerSize(int value) {
+ assert value >= 0 && value <= U24_MAX;
+ if (value <= U8_MAX) {
+ return 1;
+ }
+ if (value <= U16_MAX) {
+ return 2;
+ }
+ return U24_SIZE;
+ }
+
+ private void parseFloatingPoint(JsonParser parser) throws IOException {
+ if (!tryParseDecimal(parser.getText())) {
+ appendDouble(parser.getDoubleValue());
+ }
+ }
+
+ // Try to parse a JSON number as a decimal. Return whether the parsing succeeds. The input must
+ // only use the decimal format (an integer value with an optional '.' in it) and must not use
+ // scientific notation. It also must fit into the precision limitation of decimal types.
+ private boolean tryParseDecimal(String input) {
+ for (int i = 0; i < input.length(); ++i) {
+ char ch = input.charAt(i);
+ if (ch != '-' && ch != '.' && !(ch >= '0' && ch <= '9')) {
+ return false;
+ }
+ }
+ BigDecimal d = new BigDecimal(input);
+ if (d.scale() <= MAX_DECIMAL16_PRECISION && d.precision() <= MAX_DECIMAL16_PRECISION) {
+ appendDecimal(d);
+ return true;
+ }
+ return false;
+ }
+
+ // The write buffer in building the variant value. Its first `writePos` bytes has been written.
+ private byte[] writeBuffer = new byte[128];
+ private int writePos = 0;
+ // Map keys to a monotonically increasing id.
+ private final HashMap dictionary = new HashMap<>();
+ // Store all keys in `dictionary` in the order of id.
+ private final ArrayList dictionaryKeys = new ArrayList<>();
+ private final boolean allowDuplicateKeys;
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/InstantiationUtil.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/InstantiationUtil.java
new file mode 100644
index 00000000000..09e2285c9a4
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/InstantiationUtil.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.utils;
+
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.core.memory.DataInputViewStreamWrapper;
+import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.ObjectStreamClass;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.Proxy;
+import java.util.HashMap;
+import java.util.zip.DeflaterOutputStream;
+
+/** Utility class to create instances from class objects. */
+public class InstantiationUtil {
+ private InstantiationUtil() {
+ // no instantiation
+ }
+
+ /** A custom ObjectInputStream that can load classes using a specific ClassLoader. */
+ public static class ClassLoaderObjectInputStream extends ObjectInputStream {
+
+ protected final ClassLoader classLoader;
+
+ public ClassLoaderObjectInputStream(InputStream in, ClassLoader classLoader)
+ throws IOException {
+ super(in);
+ this.classLoader = classLoader;
+ }
+
+ @Override
+ protected Class> resolveClass(ObjectStreamClass desc)
+ throws IOException, ClassNotFoundException {
+ if (classLoader != null) {
+ String name = desc.getName();
+ try {
+ return Class.forName(name, false, classLoader);
+ } catch (ClassNotFoundException ex) {
+ // check if class is a primitive class
+ Class> cl = primitiveClasses.get(name);
+ if (cl != null) {
+ // return primitive class
+ return cl;
+ } else {
+ // throw ClassNotFoundException
+ throw ex;
+ }
+ }
+ }
+
+ return super.resolveClass(desc);
+ }
+
+ @Override
+ protected Class> resolveProxyClass(String[] interfaces)
+ throws IOException, ClassNotFoundException {
+ if (classLoader != null) {
+ ClassLoader nonPublicLoader = null;
+ boolean hasNonPublicInterface = false;
+
+ // define proxy in class loader of non-public interface(s), if any
+ Class>[] classObjs = new Class>[interfaces.length];
+ for (int i = 0; i < interfaces.length; i++) {
+ Class> cl = Class.forName(interfaces[i], false, classLoader);
+ if ((cl.getModifiers() & Modifier.PUBLIC) == 0) {
+ if (hasNonPublicInterface) {
+ if (nonPublicLoader != cl.getClassLoader()) {
+ throw new IllegalAccessError(
+ "conflicting non-public interface class loaders");
+ }
+ } else {
+ nonPublicLoader = cl.getClassLoader();
+ hasNonPublicInterface = true;
+ }
+ }
+ classObjs[i] = cl;
+ }
+ try {
+ return Proxy.getProxyClass(
+ hasNonPublicInterface ? nonPublicLoader : classLoader, classObjs);
+ } catch (IllegalArgumentException e) {
+ throw new ClassNotFoundException(null, e);
+ }
+ }
+
+ return super.resolveProxyClass(interfaces);
+ }
+
+ // ------------------------------------------------
+
+ private static final HashMap> primitiveClasses = new HashMap<>(9);
+
+ static {
+ primitiveClasses.put("boolean", boolean.class);
+ primitiveClasses.put("byte", byte.class);
+ primitiveClasses.put("char", char.class);
+ primitiveClasses.put("short", short.class);
+ primitiveClasses.put("int", int.class);
+ primitiveClasses.put("long", long.class);
+ primitiveClasses.put("float", float.class);
+ primitiveClasses.put("double", double.class);
+ primitiveClasses.put("void", void.class);
+ }
+ }
+
+ public static byte[] serializeObject(Object o) throws IOException {
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ObjectOutputStream oos = new ObjectOutputStream(baos)) {
+ oos.writeObject(o);
+ oos.flush();
+ return baos.toByteArray();
+ }
+ }
+
+ public static void serializeObject(OutputStream out, Object o) throws IOException {
+ ObjectOutputStream oos =
+ out instanceof ObjectOutputStream
+ ? (ObjectOutputStream) out
+ : new ObjectOutputStream(out);
+ oos.writeObject(o);
+ }
+
+ public static byte[] serializeObjectAndCompress(Object o) throws IOException {
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DeflaterOutputStream dos = new DeflaterOutputStream(baos);
+ ObjectOutputStream oos = new ObjectOutputStream(dos)) {
+ oos.writeObject(o);
+ oos.flush();
+ dos.close();
+ return baos.toByteArray();
+ }
+ }
+
+ public static boolean isSerializable(Object o) {
+ try {
+ serializeObject(o);
+ } catch (IOException e) {
+ return false;
+ }
+
+ return true;
+ }
+
+ public static byte[] serializeToByteArray(TypeSerializer serializer, T record)
+ throws IOException {
+ if (record == null) {
+ throw new NullPointerException("Record to serialize to byte array must not be null.");
+ }
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(64);
+ DataOutputViewStreamWrapper outputViewWrapper = new DataOutputViewStreamWrapper(bos);
+ serializer.serialize(record, outputViewWrapper);
+ return bos.toByteArray();
+ }
+
+ public static T deserializeFromByteArray(TypeSerializer serializer, byte[] buf)
+ throws IOException {
+ if (buf == null) {
+ throw new NullPointerException("Byte array to deserialize from must not be null.");
+ }
+
+ DataInputViewStreamWrapper inputViewWrapper =
+ new DataInputViewStreamWrapper(new ByteArrayInputStream(buf));
+ return serializer.deserialize(inputViewWrapper);
+ }
+
+ public static T deserializeFromByteArray(TypeSerializer serializer, T reuse, byte[] buf)
+ throws IOException {
+ if (buf == null) {
+ throw new NullPointerException("Byte array to deserialize from must not be null.");
+ }
+
+ DataInputViewStreamWrapper inputViewWrapper =
+ new DataInputViewStreamWrapper(new ByteArrayInputStream(buf));
+ return serializer.deserialize(reuse, inputViewWrapper);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static T deserializeObject(byte[] bytes, ClassLoader cl)
+ throws IOException, ClassNotFoundException {
+ return deserializeObject(new ByteArrayInputStream(bytes), cl);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static T deserializeObject(InputStream in, ClassLoader cl)
+ throws IOException, ClassNotFoundException {
+
+ final ClassLoader old = Thread.currentThread().getContextClassLoader();
+ // not using resource try to avoid AutoClosable's close() on the given stream
+ try {
+ ObjectInputStream oois = new ClassLoaderObjectInputStream(in, cl);
+ Thread.currentThread().setContextClassLoader(cl);
+ return (T) oois.readObject();
+ } finally {
+ Thread.currentThread().setContextClassLoader(old);
+ }
+ }
+
+ /**
+ * Clones the given serializable object using Java serialization.
+ *
+ * @param obj Object to clone
+ * @param Type of the object to clone
+ * @return The cloned object
+ * @throws IOException Thrown if the serialization or deserialization process fails.
+ * @throws ClassNotFoundException Thrown if any of the classes referenced by the object cannot
+ * be resolved during deserialization.
+ */
+ public static T clone(T obj)
+ throws IOException, ClassNotFoundException {
+ if (obj == null) {
+ return null;
+ } else {
+ return clone(obj, obj.getClass().getClassLoader());
+ }
+ }
+
+ /**
+ * Clones the given serializable object using Java serialization, using the given classloader to
+ * resolve the cloned classes.
+ *
+ * @param obj Object to clone
+ * @param classLoader The classloader to resolve the classes during deserialization.
+ * @param Type of the object to clone
+ * @return Cloned object
+ * @throws IOException Thrown if the serialization or deserialization process fails.
+ * @throws ClassNotFoundException Thrown if any of the classes referenced by the object cannot
+ * be resolved during deserialization.
+ */
+ public static T clone(T obj, ClassLoader classLoader)
+ throws IOException, ClassNotFoundException {
+ if (obj == null) {
+ return null;
+ } else {
+ final byte[] serializedObject = serializeObject(obj);
+ return deserializeObject(serializedObject, classLoader);
+ }
+ }
+}
diff --git a/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java
new file mode 100644
index 00000000000..665aef5f509
--- /dev/null
+++ b/flink-cdc-common-2.x/src/main/java/org/apache/flink/cdc/common/utils/SchemaMergingUtils.java
@@ -0,0 +1,1058 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.utils;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.annotation.VisibleForTesting;
+import org.apache.flink.cdc.common.data.DateData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.LocalZonedTimestampData;
+import org.apache.flink.cdc.common.data.StringData;
+import org.apache.flink.cdc.common.data.TimeData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.data.ZonedTimestampData;
+import org.apache.flink.cdc.common.data.binary.BinaryStringData;
+import org.apache.flink.cdc.common.event.AddColumnEvent;
+import org.apache.flink.cdc.common.event.AlterColumnTypeEvent;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.event.DropColumnEvent;
+import org.apache.flink.cdc.common.event.SchemaChangeEvent;
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.schema.Column;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.common.types.ArrayType;
+import org.apache.flink.cdc.common.types.BigIntType;
+import org.apache.flink.cdc.common.types.BinaryType;
+import org.apache.flink.cdc.common.types.BooleanType;
+import org.apache.flink.cdc.common.types.CharType;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.DataTypeFamily;
+import org.apache.flink.cdc.common.types.DataTypeRoot;
+import org.apache.flink.cdc.common.types.DataTypes;
+import org.apache.flink.cdc.common.types.DateType;
+import org.apache.flink.cdc.common.types.DecimalType;
+import org.apache.flink.cdc.common.types.DoubleType;
+import org.apache.flink.cdc.common.types.FloatType;
+import org.apache.flink.cdc.common.types.IntType;
+import org.apache.flink.cdc.common.types.LocalZonedTimestampType;
+import org.apache.flink.cdc.common.types.MapType;
+import org.apache.flink.cdc.common.types.RowType;
+import org.apache.flink.cdc.common.types.SmallIntType;
+import org.apache.flink.cdc.common.types.TimeType;
+import org.apache.flink.cdc.common.types.TimestampType;
+import org.apache.flink.cdc.common.types.TinyIntType;
+import org.apache.flink.cdc.common.types.VarBinaryType;
+import org.apache.flink.cdc.common.types.VarCharType;
+import org.apache.flink.cdc.common.types.VariantType;
+import org.apache.flink.cdc.common.types.ZonedTimestampType;
+import org.apache.flink.cdc.common.types.variant.Variant;
+
+import org.apache.flink.shaded.guava33.com.google.common.collect.ArrayListMultimap;
+import org.apache.flink.shaded.guava33.com.google.common.collect.ImmutableList;
+import org.apache.flink.shaded.guava33.com.google.common.collect.Streams;
+import org.apache.flink.shaded.guava33.com.google.common.io.BaseEncoding;
+
+import javax.annotation.Nullable;
+
+import java.math.BigDecimal;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * Utils for merging {@link Schema}s and {@link DataType}s. Prefer using this over {@link
+ * SchemaUtils} to get consistent schema merging behaviors.
+ */
+@PublicEvolving
+public class SchemaMergingUtils {
+ /**
+ * Checking if given {@code upcomingSchema} could be fit into currently known {@code
+ * currentSchema}. Current schema could be null (as the cold opening state, and in this case it
+ * always returns {@code false}) but the upcoming schema should never be null.
+ * This method only checks columns' type compatibility, but ignores metadata fields like
+ * primaryKeys, partitionKeys, options.
+ */
+ public static boolean isSchemaCompatible(
+ @Nullable Schema currentSchema, Schema upcomingSchema) {
+ if (currentSchema == null) {
+ return false;
+ }
+ Map currentColumnTypes =
+ currentSchema.getColumns().stream()
+ .collect(Collectors.toMap(Column::getName, Column::getType));
+ List upcomingColumns = upcomingSchema.getColumns();
+
+ for (Column upcomingColumn : upcomingColumns) {
+ String columnName = upcomingColumn.getName();
+ DataType upcomingColumnType = upcomingColumn.getType();
+ DataType currentColumnType = currentColumnTypes.get(columnName);
+
+ if (!isDataTypeCompatible(currentColumnType, upcomingColumnType)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Try to merge {@code upcomingSchema} into {@code currentSchema} by performing lenient schema
+ * changes. Returns a wider schema that could both of them.
+ */
+ public static Schema getLeastCommonSchema(
+ @Nullable Schema currentSchema, Schema upcomingSchema) {
+ // No current schema record, we need to create it first.
+ if (currentSchema == null) {
+ return upcomingSchema;
+ }
+
+ // Current schema is compatible with upcoming ones, just return it and perform no schema
+ // evolution.
+ if (isSchemaCompatible(currentSchema, upcomingSchema)) {
+ return currentSchema;
+ }
+
+ Map newTypeMapping = new HashMap<>();
+
+ Map currentColumns =
+ currentSchema.getColumns().stream()
+ .collect(Collectors.toMap(Column::getName, col -> col));
+ List upcomingColumns = upcomingSchema.getColumns();
+
+ List appendedColumns = new ArrayList<>();
+
+ for (Column upcomingColumn : upcomingColumns) {
+ String columnName = upcomingColumn.getName();
+ DataType upcomingColumnType = upcomingColumn.getType();
+ if (currentColumns.containsKey(columnName)) {
+ Column currentColumn = currentColumns.get(columnName);
+ DataType currentColumnType = currentColumn.getType();
+ DataType leastCommonType =
+ getLeastCommonType(currentColumnType, upcomingColumnType);
+ if (!Objects.equals(leastCommonType, currentColumnType)) {
+ newTypeMapping.put(columnName, leastCommonType);
+ }
+ } else {
+ appendedColumns.add(upcomingColumn);
+ }
+ }
+
+ List commonColumns = new ArrayList<>();
+ for (Column column : currentSchema.getColumns()) {
+ if (newTypeMapping.containsKey(column.getName())) {
+ commonColumns.add(column.copy(newTypeMapping.get(column.getName())));
+ } else {
+ commonColumns.add(column);
+ }
+ }
+
+ commonColumns.addAll(appendedColumns);
+ return currentSchema.copy(commonColumns);
+ }
+
+ /** Merge compatible schemas. */
+ public static Schema getCommonSchema(List schemas) {
+ if (schemas.isEmpty()) {
+ return null;
+ } else if (schemas.size() == 1) {
+ return schemas.get(0);
+ } else {
+ Schema outputSchema = null;
+ for (Schema schema : schemas) {
+ outputSchema = getLeastCommonSchema(outputSchema, schema);
+ }
+ return outputSchema;
+ }
+ }
+
+ /**
+ * Generating what schema change events we need to do by converting compatible {@code
+ * beforeSchema} to {@code afterSchema}.
+ */
+ public static List getSchemaDifference(
+ TableId tableId, @Nullable Schema beforeSchema, Schema afterSchema) {
+ if (beforeSchema == null) {
+ return Collections.singletonList(new CreateTableEvent(tableId, afterSchema));
+ }
+
+ Map beforeColumns =
+ beforeSchema.getColumns().stream()
+ .collect(Collectors.toMap(Column::getName, col -> col));
+
+ Map oldTypeMapping = new HashMap<>();
+ Map newTypeMapping = new HashMap<>();
+ List appendedColumns = new ArrayList<>();
+
+ String afterWhichColumnPosition = null;
+ for (Column afterColumn : afterSchema.getColumns()) {
+ String columnName = afterColumn.getName();
+ DataType afterType = afterColumn.getType();
+ if (beforeColumns.containsKey(columnName)) {
+ DataType beforeType = beforeColumns.get(columnName).getType();
+ if (!Objects.equals(beforeType, afterType)) {
+ oldTypeMapping.put(columnName, beforeType);
+ newTypeMapping.put(columnName, afterType);
+ }
+ beforeColumns.remove(columnName);
+ } else {
+ if (afterWhichColumnPosition == null) {
+ appendedColumns.add(
+ new AddColumnEvent.ColumnWithPosition(
+ afterColumn, AddColumnEvent.ColumnPosition.FIRST, null));
+ } else {
+ appendedColumns.add(
+ new AddColumnEvent.ColumnWithPosition(
+ afterColumn,
+ AddColumnEvent.ColumnPosition.AFTER,
+ afterWhichColumnPosition));
+ }
+ }
+ afterWhichColumnPosition = afterColumn.getName();
+ }
+
+ List schemaChangeEvents = new ArrayList<>();
+ if (!appendedColumns.isEmpty()) {
+ schemaChangeEvents.add(new AddColumnEvent(tableId, appendedColumns));
+ }
+
+ if (!newTypeMapping.isEmpty()) {
+ schemaChangeEvents.add(
+ new AlterColumnTypeEvent(tableId, newTypeMapping, oldTypeMapping));
+ }
+
+ if (!beforeColumns.isEmpty()) {
+ schemaChangeEvents.add(
+ new DropColumnEvent(tableId, new ArrayList<>(beforeColumns.keySet())));
+ }
+ return schemaChangeEvents;
+ }
+
+ /**
+ * Coercing {@code upcomingRow} with {@code upcomingTypes} schema into {@code currentTypes}
+ * schema. Invoking this method implicitly assumes that {@code isSchemaCompatible(currentSchema,
+ * upcomingSchema)} returns true. Otherwise, some upstream records might be lost.
+ */
+ public static Object[] coerceRow(
+ String timezone,
+ Schema currentSchema,
+ Schema upcomingSchema,
+ List upcomingRow) {
+ return coerceRow(timezone, currentSchema, upcomingSchema, upcomingRow, true);
+ }
+
+ /**
+ * Coercing {@code upcomingRow} with {@code upcomingTypes} schema into {@code currentTypes}
+ * schema. Invoking this method implicitly assumes that {@code isSchemaCompatible(currentSchema,
+ * upcomingSchema)} returns true. Otherwise, some upstream records might be lost.
+ */
+ public static Object[] coerceRow(
+ String timezone,
+ Schema currentSchema,
+ Schema upcomingSchema,
+ List upcomingRow,
+ boolean toleranceMode) {
+ List currentColumns = currentSchema.getColumns();
+ Map upcomingColumnTypes =
+ upcomingSchema.getColumns().stream()
+ .collect(Collectors.toMap(Column::getName, Column::getType));
+ Map upcomingColumnObjects =
+ Streams.zip(
+ upcomingSchema.getColumnNames().stream(),
+ upcomingRow.stream(),
+ Tuple2::of)
+ .filter(t -> t.f1 != null)
+ .collect(Collectors.toMap(t -> t.f0, t -> t.f1));
+ Object[] coercedRow = new Object[currentSchema.getColumnCount()];
+
+ for (int i = 0; i < currentSchema.getColumnCount(); i++) {
+ Column currentColumn = currentColumns.get(i);
+ String columnName = currentColumn.getName();
+ if (upcomingColumnTypes.containsKey(columnName)) {
+
+ DataType upcomingType = upcomingColumnTypes.get(columnName);
+ DataType currentType = currentColumn.getType();
+
+ if (Objects.equals(upcomingType, currentType)) {
+ coercedRow[i] = upcomingColumnObjects.get(columnName);
+ } else {
+ try {
+ coercedRow[i] =
+ coerceObject(
+ timezone,
+ upcomingColumnObjects.get(columnName),
+ upcomingColumnTypes.get(columnName),
+ currentColumn.getType());
+ } catch (IllegalArgumentException e) {
+ if (!toleranceMode) {
+ throw e;
+ }
+ }
+ }
+ } else {
+ coercedRow[i] = null;
+ }
+ }
+ return coercedRow;
+ }
+
+ /**
+ * Try to merge given {@link Schema}s and ensure they're identical. The only difference allowed
+ * is nullability, string and varchar precision, default value, and comments.
+ */
+ public static Schema strictlyMergeSchemas(List schemas) {
+ Preconditions.checkArgument(
+ !schemas.isEmpty(), "Trying to merge transformed schemas %s, but got empty list");
+ if (schemas.size() == 1) {
+ return schemas.get(0);
+ }
+
+ List> primaryKeys =
+ schemas.stream()
+ .map(Schema::primaryKeys)
+ .filter(p -> !p.isEmpty())
+ .distinct()
+ .collect(Collectors.toList());
+ List> partitionKeys =
+ schemas.stream()
+ .map(Schema::partitionKeys)
+ .filter(p -> !p.isEmpty())
+ .distinct()
+ .collect(Collectors.toList());
+ List> options =
+ schemas.stream()
+ .map(Schema::options)
+ .filter(p -> !p.isEmpty())
+ .distinct()
+ .collect(Collectors.toList());
+ List> columnNames =
+ schemas.stream()
+ .map(Schema::getColumnNames)
+ .distinct()
+ .collect(Collectors.toList());
+
+ Preconditions.checkArgument(
+ primaryKeys.size() <= 1,
+ "Trying to merge transformed schemas %s, but got more than one primary key configurations: %s",
+ schemas,
+ primaryKeys);
+ Preconditions.checkArgument(
+ partitionKeys.size() <= 1,
+ "Trying to merge transformed schemas %s, but got more than one partition key configurations: %s",
+ schemas,
+ partitionKeys);
+ Preconditions.checkArgument(
+ options.size() <= 1,
+ "Trying to merge transformed schemas %s, but got more than one option configurations: %s",
+ schemas,
+ options);
+ Preconditions.checkArgument(
+ columnNames.size() == 1,
+ "Trying to merge transformed schemas %s, but got more than one column name views: %s",
+ schemas,
+ columnNames);
+
+ int arity = columnNames.get(0).size();
+
+ ArrayListMultimap toBeMergedColumnTypes =
+ ArrayListMultimap.create(arity, 1);
+ for (Schema schema : schemas) {
+ List columnTypes = schema.getColumnDataTypes();
+ for (int colIndex = 0; colIndex < columnTypes.size(); colIndex++) {
+ toBeMergedColumnTypes.put(colIndex, columnTypes.get(colIndex));
+ }
+ }
+
+ List mergedColumnNames = columnNames.iterator().next();
+ List mergedColumnTypes = new ArrayList<>(arity);
+ for (int i = 0; i < arity; i++) {
+ mergedColumnTypes.add(strictlyMergeDataTypes(toBeMergedColumnTypes.get(i)));
+ }
+
+ List mergedColumns = new ArrayList<>();
+ for (int i = 0; i < mergedColumnNames.size(); i++) {
+ mergedColumns.add(
+ Column.physicalColumn(mergedColumnNames.get(i), mergedColumnTypes.get(i)));
+ }
+
+ return Schema.newBuilder()
+ .primaryKey(primaryKeys.isEmpty() ? Collections.emptyList() : primaryKeys.get(0))
+ .partitionKey(
+ partitionKeys.isEmpty() ? Collections.emptyList() : partitionKeys.get(0))
+ .options(options.isEmpty() ? Collections.emptyMap() : options.get(0))
+ .setColumns(mergedColumns)
+ .build();
+ }
+
+ private static DataType strictlyMergeDataTypes(List dataTypes) {
+ Preconditions.checkArgument(
+ !dataTypes.isEmpty(),
+ "Trying to merge transformed data types %s, but got empty list");
+
+ List simpleMergeTypes =
+ dataTypes.stream().distinct().collect(Collectors.toList());
+ if (simpleMergeTypes.size() == 1) {
+ return simpleMergeTypes.get(0);
+ }
+
+ List typeRoots =
+ dataTypes.stream()
+ .map(DataType::getTypeRoot)
+ .distinct()
+ .collect(Collectors.toList());
+ Preconditions.checkArgument(
+ typeRoots.size() == 1,
+ "Trying to merge types %s, but got more than one type root: %s",
+ dataTypes,
+ typeRoots);
+
+ // Decay types to the most
+ DataType type = dataTypes.get(0);
+
+ if (type.is(DataTypeRoot.CHAR)) {
+ return DataTypes.CHAR(CharType.MAX_LENGTH);
+ } else if (type.is(DataTypeRoot.VARCHAR)) {
+ return DataTypes.STRING();
+ } else if (type.is(DataTypeRoot.BINARY)) {
+ return DataTypes.BINARY(BinaryType.MAX_LENGTH);
+ } else if (type.is(DataTypeRoot.VARBINARY)) {
+ return DataTypes.VARBINARY(VarBinaryType.MAX_LENGTH);
+ } else if (type.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE)) {
+ return DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION);
+ } else if (type.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
+ return DataTypes.TIMESTAMP_TZ(ZonedTimestampType.MAX_PRECISION);
+ } else if (type.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)) {
+ return DataTypes.TIMESTAMP_LTZ(LocalZonedTimestampType.MAX_PRECISION);
+ } else {
+ throw new IllegalArgumentException(
+ "Unable to merge data types with different precision: " + dataTypes);
+ }
+ }
+
+ @VisibleForTesting
+ static boolean isDataTypeCompatible(@Nullable DataType currentType, DataType upcomingType) {
+ // If two types are identical, they're compatible of course.
+ if (Objects.equals(currentType, upcomingType)) {
+ return true;
+ }
+
+ // Or, if an upcoming column does not exist in current schema, it can't be compatible.
+ if (currentType == null) {
+ return false;
+ }
+
+ // Or, check if upcomingType is presented in the type merging tree.
+ return TYPE_MERGING_TREE.get(upcomingType.getClass()).contains(currentType);
+ }
+
+ @VisibleForTesting
+ static DataType getLeastCommonType(DataType currentType, DataType targetType) {
+ // Ignore nullability during data type merge, and restore it later
+ boolean nullable = currentType.isNullable() || targetType.isNullable();
+ currentType = currentType.notNull();
+ targetType = targetType.notNull();
+
+ if (Objects.equals(currentType, targetType)) {
+ return currentType.copy(nullable);
+ }
+
+ // For TIMESTAMP and EXACT_NUMERIC types, we have fine-grained type merging logic.
+ if (currentType.is(DataTypeFamily.TIMESTAMP) && targetType.is(DataTypeFamily.TIMESTAMP)) {
+ return mergeTimestampType(currentType, targetType).copy(nullable);
+ }
+
+ if (currentType instanceof DecimalType || targetType instanceof DecimalType) {
+ return mergeDecimalType(currentType, targetType).copy(nullable);
+ }
+
+ List currentTypeTree = TYPE_MERGING_TREE.get(currentType.getClass());
+ List targetTypeTree = TYPE_MERGING_TREE.get(targetType.getClass());
+
+ for (DataType type : currentTypeTree) {
+ if (targetTypeTree.contains(type)) {
+ return type.copy(nullable);
+ }
+ }
+
+ // The most universal type and our final resort: STRING.
+ return DataTypes.STRING().copy(nullable);
+ }
+
+ @VisibleForTesting
+ static DataType mergeTimestampType(DataType lType, DataType rType) {
+ // TIMESTAMP (0) -> TIMESTAMP_LTZ (1) -> TIMESTAMP_TZ (2)
+ int leftTypeLevel;
+ int leftPrecision;
+ int rightTypeLevel;
+ int rightPrecision;
+
+ if (lType instanceof TimestampType) {
+ leftTypeLevel = 0;
+ leftPrecision = ((TimestampType) lType).getPrecision();
+ } else if (lType instanceof LocalZonedTimestampType) {
+ leftTypeLevel = 1;
+ leftPrecision = ((LocalZonedTimestampType) lType).getPrecision();
+ } else if (lType instanceof ZonedTimestampType) {
+ leftTypeLevel = 2;
+ leftPrecision = ((ZonedTimestampType) lType).getPrecision();
+ } else {
+ throw new IllegalArgumentException("Unknown TIMESTAMP type: " + lType);
+ }
+
+ if (rType instanceof TimestampType) {
+ rightTypeLevel = 0;
+ rightPrecision = ((TimestampType) rType).getPrecision();
+ } else if (rType instanceof LocalZonedTimestampType) {
+ rightTypeLevel = 1;
+ rightPrecision = ((LocalZonedTimestampType) rType).getPrecision();
+ } else if (rType instanceof ZonedTimestampType) {
+ rightTypeLevel = 2;
+ rightPrecision = ((ZonedTimestampType) rType).getPrecision();
+ } else {
+ throw new IllegalArgumentException("Unknown TIMESTAMP type: " + lType);
+ }
+
+ int precision = Math.max(leftPrecision, rightPrecision);
+
+ switch (Math.max(leftTypeLevel, rightTypeLevel)) {
+ case 0:
+ return DataTypes.TIMESTAMP(precision);
+ case 1:
+ return DataTypes.TIMESTAMP_LTZ(precision);
+ case 2:
+ return DataTypes.TIMESTAMP_TZ(precision);
+ default:
+ throw new IllegalArgumentException("Unreachable");
+ }
+ }
+
+ @VisibleForTesting
+ static DataType mergeDecimalType(DataType lType, DataType rType) {
+ if (lType instanceof DecimalType && rType instanceof DecimalType) {
+ // Merge two decimal types
+ DecimalType lhsDecimal = (DecimalType) lType;
+ DecimalType rhsDecimal = (DecimalType) rType;
+ int resultIntDigits =
+ Math.max(
+ lhsDecimal.getPrecision() - lhsDecimal.getScale(),
+ rhsDecimal.getPrecision() - rhsDecimal.getScale());
+ int resultScale = Math.max(lhsDecimal.getScale(), rhsDecimal.getScale());
+ Preconditions.checkArgument(
+ resultIntDigits + resultScale <= DecimalType.MAX_PRECISION,
+ String.format(
+ "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available",
+ lType,
+ rType,
+ resultIntDigits + resultScale,
+ DecimalType.MAX_PRECISION));
+ return DataTypes.DECIMAL(resultIntDigits + resultScale, resultScale);
+ } else if (lType instanceof DecimalType && rType.is(DataTypeFamily.EXACT_NUMERIC)) {
+ // Merge decimal and int
+ return mergeExactNumericsIntoDecimal((DecimalType) lType, rType);
+ } else if (rType instanceof DecimalType && lType.is(DataTypeFamily.EXACT_NUMERIC)) {
+ // Merge decimal and int
+ return mergeExactNumericsIntoDecimal((DecimalType) rType, lType);
+ } else {
+ return DataTypes.STRING();
+ }
+ }
+
+ private static DataType mergeExactNumericsIntoDecimal(
+ DecimalType decimalType, DataType otherType) {
+ int resultPrecision =
+ Math.max(
+ decimalType.getPrecision(),
+ decimalType.getScale() + getNumericPrecision(otherType));
+ if (resultPrecision <= DecimalType.MAX_PRECISION) {
+ return DataTypes.DECIMAL(resultPrecision, decimalType.getScale());
+ } else {
+ return DataTypes.STRING();
+ }
+ }
+
+ @VisibleForTesting
+ public static int getNumericPrecision(DataType dataType) {
+ if (dataType.is(DataTypeFamily.EXACT_NUMERIC)) {
+ if (dataType.is(DataTypeRoot.TINYINT)) {
+ return 3;
+ } else if (dataType.is(DataTypeRoot.SMALLINT)) {
+ return 5;
+ } else if (dataType.is(DataTypeRoot.INTEGER)) {
+ return 10;
+ } else if (dataType.is(DataTypeRoot.BIGINT)) {
+ return 19;
+ } else if (dataType.is(DataTypeRoot.DECIMAL)) {
+ return ((DecimalType) dataType).getPrecision();
+ }
+ }
+
+ throw new IllegalArgumentException(
+ "Failed to get precision of non-exact decimal type " + dataType);
+ }
+
+ @VisibleForTesting
+ public static Object coerceObject(
+ String timezone,
+ Object originalField,
+ DataType originalType,
+ DataType destinationType) {
+ if (originalField == null) {
+ return null;
+ }
+
+ if (destinationType instanceof BooleanType) {
+ return Boolean.valueOf(originalField.toString());
+ }
+
+ if (destinationType instanceof TinyIntType) {
+ return coerceToByte(originalField);
+ }
+
+ if (destinationType instanceof SmallIntType) {
+ return coerceToShort(originalField);
+ }
+
+ if (destinationType instanceof IntType) {
+ return coerceToInt(originalField);
+ }
+
+ if (destinationType instanceof BigIntType) {
+ return coerceToLong(originalField);
+ }
+
+ if (destinationType instanceof DecimalType) {
+ DecimalType decimalType = (DecimalType) destinationType;
+ return coerceToDecimal(
+ originalField, decimalType.getPrecision(), decimalType.getScale());
+ }
+
+ if (destinationType instanceof FloatType) {
+ return coerceToFloat(originalField);
+ }
+
+ if (destinationType instanceof DoubleType) {
+ return coerceToDouble(originalField);
+ }
+
+ if (destinationType instanceof CharType) {
+ return coerceToString(originalField, originalType);
+ }
+
+ if (destinationType instanceof VarCharType) {
+ return coerceToString(originalField, originalType);
+ }
+
+ if (destinationType instanceof BinaryType) {
+ return coerceToBytes(originalField);
+ }
+
+ if (destinationType instanceof VarBinaryType) {
+ return coerceToBytes(originalField);
+ }
+
+ if (destinationType instanceof DateType) {
+ return coerceToDate(originalField);
+ }
+
+ if (destinationType instanceof TimeType) {
+ return coerceToTime(originalField);
+ }
+
+ if (destinationType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE)
+ && originalType.is(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE)) {
+ // For now, TimestampData / ZonedTimestampData / LocalZonedTimestampData has no
+ // difference in its internal representation, so there's no need to do any precision
+ // conversion.
+ return originalField;
+ }
+
+ if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE)
+ && originalType.is(DataTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
+ return originalField;
+ }
+
+ if (destinationType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)
+ && originalType.is(DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)) {
+ return originalField;
+ }
+
+ if (destinationType instanceof TimestampType) {
+ return coerceToTimestamp(originalField, timezone);
+ }
+
+ if (destinationType instanceof LocalZonedTimestampType) {
+ return coerceToLocalZonedTimestamp(originalField, timezone);
+ }
+
+ if (destinationType instanceof ZonedTimestampType) {
+ return coerceToZonedTimestamp(originalField, timezone);
+ }
+
+ throw new IllegalArgumentException(
+ String.format(
+ "Column type \"%s\" doesn't support type coercion to \"%s\"",
+ originalType, destinationType));
+ }
+
+ private static Object coerceToString(Object originalField, DataType originalType) {
+ if (originalField == null) {
+ return BinaryStringData.fromString("null");
+ }
+
+ if (originalField instanceof StringData) {
+ return originalField;
+ }
+
+ if (originalType instanceof DateType || originalType instanceof TimeType) {
+ return BinaryStringData.fromString(originalField.toString());
+ }
+
+ if (originalField instanceof byte[]) {
+ return BinaryStringData.fromString(hexlify((byte[]) originalField));
+ }
+
+ if (originalField instanceof Variant) {
+ return BinaryStringData.fromString(((Variant) originalField).toJson());
+ }
+
+ return BinaryStringData.fromString(originalField.toString());
+ }
+
+ private static Object coerceToBytes(Object originalField) {
+ if (originalField instanceof byte[]) {
+ return originalField;
+ } else {
+ return originalField.toString().getBytes();
+ }
+ }
+
+ private static byte coerceToByte(Object o) {
+ if (o instanceof Byte) {
+ return (Byte) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format("Cannot fit type \"%s\" into a TINYINT column. ", o.getClass()));
+ }
+ }
+
+ private static short coerceToShort(Object o) {
+ if (o instanceof Byte) {
+ return ((Byte) o).shortValue();
+ } else if (o instanceof Short) {
+ return (Short) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a SMALLINT column. "
+ + "Currently only TINYINT can be accepted by a SMALLINT column",
+ o.getClass()));
+ }
+ }
+
+ private static int coerceToInt(Object o) {
+ if (o instanceof Byte) {
+ return ((Byte) o).intValue();
+ } else if (o instanceof Short) {
+ return ((Short) o).intValue();
+ } else if (o instanceof Integer) {
+ return (Integer) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a INT column. "
+ + "Currently only TINYINT / SMALLINT can be accepted by a INT column",
+ o.getClass()));
+ }
+ }
+
+ private static long coerceToLong(Object o) {
+ if (o instanceof Byte) {
+ return ((Byte) o).longValue();
+ } else if (o instanceof Short) {
+ return ((Short) o).longValue();
+ } else if (o instanceof Integer) {
+ return ((Integer) o).longValue();
+ } else if (o instanceof Long) {
+ return (long) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a BIGINT column. "
+ + "Currently only TINYINT / SMALLINT / INT can be accepted by a BIGINT column",
+ o.getClass()));
+ }
+ }
+
+ private static DecimalData coerceToDecimal(Object o, int precision, int scale) {
+ BigDecimal decimalValue;
+ if (o instanceof Byte) {
+ decimalValue = BigDecimal.valueOf(((Byte) o).longValue(), 0);
+ } else if (o instanceof Short) {
+ decimalValue = BigDecimal.valueOf(((Short) o).longValue(), 0);
+ } else if (o instanceof Integer) {
+ decimalValue = BigDecimal.valueOf(((Integer) o).longValue(), 0);
+ } else if (o instanceof Long) {
+ decimalValue = BigDecimal.valueOf((Long) o, 0);
+ } else if (o instanceof DecimalData) {
+ decimalValue = ((DecimalData) o).toBigDecimal();
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a DECIMAL column. "
+ + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL can be accepted by a DECIMAL column",
+ o.getClass()));
+ }
+ return decimalValue != null
+ ? DecimalData.fromBigDecimal(decimalValue, precision, scale)
+ : null;
+ }
+
+ private static float coerceToFloat(Object o) {
+ if (o instanceof Byte) {
+ return ((Byte) o).floatValue();
+ } else if (o instanceof Short) {
+ return ((Short) o).floatValue();
+ } else if (o instanceof Integer) {
+ return ((Integer) o).floatValue();
+ } else if (o instanceof Long) {
+ return ((Long) o).floatValue();
+ } else if (o instanceof DecimalData) {
+ return ((DecimalData) o).toBigDecimal().floatValue();
+ } else if (o instanceof Float) {
+ return (Float) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a FLOAT column. "
+ + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL can be accepted by a FLOAT column",
+ o.getClass()));
+ }
+ }
+
+ private static double coerceToDouble(Object o) {
+ if (o instanceof Byte) {
+ return ((Byte) o).doubleValue();
+ } else if (o instanceof Short) {
+ return ((Short) o).doubleValue();
+ } else if (o instanceof Integer) {
+ return ((Integer) o).doubleValue();
+ } else if (o instanceof Long) {
+ return ((Long) o).doubleValue();
+ } else if (o instanceof DecimalData) {
+ return ((DecimalData) o).toBigDecimal().doubleValue();
+ } else if (o instanceof Float) {
+ return ((Float) o).doubleValue();
+ } else if (o instanceof Double) {
+ return (Double) o;
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot fit type \"%s\" into a DOUBLE column. "
+ + "Currently only TINYINT / SMALLINT / INT / BIGINT / DECIMAL / FLOAT can be accepted by a DOUBLE column",
+ o.getClass()));
+ }
+ }
+
+ private static DateData coerceToDate(Object o) {
+ if (o == null) {
+ return null;
+ }
+ if (o instanceof DateData) {
+ return (DateData) o;
+ }
+ if (o instanceof Number) {
+ return DateData.fromEpochDay(((Number) o).intValue());
+ }
+ if (o instanceof String) {
+ return DateData.fromIsoLocalDateString((String) o);
+ }
+ if (o instanceof LocalDate) {
+ return DateData.fromLocalDate((LocalDate) o);
+ }
+ if (o instanceof LocalDateTime) {
+ return DateData.fromLocalDate(((LocalDateTime) o).toLocalDate());
+ }
+ throw new IllegalArgumentException(
+ String.format("Cannot fit type \"%s\" into a DATE column. ", o.getClass()));
+ }
+
+ private static TimeData coerceToTime(Object o) {
+ if (o == null) {
+ return null;
+ }
+ if (o instanceof TimeData) {
+ return (TimeData) o;
+ }
+ if (o instanceof Number) {
+ return TimeData.fromNanoOfDay(((Number) o).longValue());
+ }
+ if (o instanceof String) {
+ return TimeData.fromIsoLocalTimeString((String) o);
+ }
+ if (o instanceof LocalTime) {
+ return TimeData.fromLocalTime((LocalTime) o);
+ }
+ if (o instanceof LocalDateTime) {
+ return TimeData.fromLocalTime(((LocalDateTime) o).toLocalTime());
+ }
+ throw new IllegalArgumentException(
+ String.format("Cannot fit type \"%s\" into a TIME column. ", o.getClass()));
+ }
+
+ private static TimestampData coerceToTimestamp(Object object, String timezone) {
+ if (object == null) {
+ return null;
+ }
+ if (object instanceof Long) {
+ return TimestampData.fromLocalDateTime(
+ LocalDate.ofEpochDay((long) object).atStartOfDay());
+ } else if (object instanceof LocalZonedTimestampData) {
+ return TimestampData.fromLocalDateTime(
+ LocalDateTime.ofInstant(
+ ((LocalZonedTimestampData) object).toInstant(), ZoneId.of(timezone)));
+ } else if (object instanceof ZonedTimestampData) {
+ return TimestampData.fromLocalDateTime(
+ LocalDateTime.ofInstant(
+ ((ZonedTimestampData) object).toInstant(), ZoneId.of(timezone)));
+ } else if (object instanceof TimestampData) {
+ return (TimestampData) object;
+ } else if (object instanceof DateData) {
+ return TimestampData.fromLocalDateTime(
+ ((DateData) object).toLocalDate().atStartOfDay());
+ } else {
+ throw new IllegalArgumentException(
+ String.format(
+ "Unable to implicitly coerce object `%s` as a TIMESTAMP.", object));
+ }
+ }
+
+ private static LocalZonedTimestampData coerceToLocalZonedTimestamp(
+ Object object, String timezone) {
+ if (object == null) {
+ return null;
+ }
+
+ TimestampData timestampData = coerceToTimestamp(object, timezone);
+ return LocalZonedTimestampData.fromEpochMillis(
+ timestampData.getMillisecond(), timestampData.getNanoOfMillisecond());
+ }
+
+ private static ZonedTimestampData coerceToZonedTimestamp(Object object, String timezone) {
+ if (object == null) {
+ return null;
+ }
+
+ TimestampData timestampData = coerceToTimestamp(object, timezone);
+ return ZonedTimestampData.fromZonedDateTime(
+ ZonedDateTime.ofInstant(
+ timestampData.toLocalDateTime().toInstant(ZoneOffset.UTC),
+ ZoneId.of(timezone)));
+ }
+
+ private static String hexlify(byte[] bytes) {
+ return BaseEncoding.base64().encode(bytes);
+ }
+
+ private static final Map, List> TYPE_MERGING_TREE =
+ getTypeMergingTree();
+
+ private static Map, List> getTypeMergingTree() {
+ DataType stringType = DataTypes.STRING();
+ DataType doubleType = DataTypes.DOUBLE();
+ DataType floatType = DataTypes.FLOAT();
+ DataType decimalType =
+ DataTypes.DECIMAL(DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE);
+ DataType bigIntType = DataTypes.BIGINT();
+ DataType intType = DataTypes.INT();
+ DataType smallIntType = DataTypes.SMALLINT();
+ DataType tinyIntType = DataTypes.TINYINT();
+ DataType timestampTzType = DataTypes.TIMESTAMP_TZ(ZonedTimestampType.MAX_PRECISION);
+ DataType timestampLtzType = DataTypes.TIMESTAMP_LTZ(LocalZonedTimestampType.MAX_PRECISION);
+ DataType timestampType = DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION);
+ DataType dateType = DataTypes.DATE();
+
+ Map, List> mergingTree = new HashMap<>();
+
+ // Simple data types
+ mergingTree.put(VarCharType.class, ImmutableList.of(stringType));
+ mergingTree.put(CharType.class, ImmutableList.of(stringType));
+ mergingTree.put(BooleanType.class, ImmutableList.of(stringType));
+ mergingTree.put(BinaryType.class, ImmutableList.of(stringType));
+ mergingTree.put(VarBinaryType.class, ImmutableList.of(stringType));
+ mergingTree.put(DoubleType.class, ImmutableList.of(doubleType, stringType));
+ mergingTree.put(FloatType.class, ImmutableList.of(floatType, doubleType, stringType));
+ mergingTree.put(DecimalType.class, ImmutableList.of(stringType));
+ mergingTree.put(
+ BigIntType.class,
+ ImmutableList.of(bigIntType, decimalType, doubleType, stringType));
+ mergingTree.put(
+ IntType.class,
+ ImmutableList.of(intType, bigIntType, decimalType, doubleType, stringType));
+ mergingTree.put(
+ SmallIntType.class,
+ ImmutableList.of(
+ smallIntType,
+ intType,
+ bigIntType,
+ decimalType,
+ floatType,
+ doubleType,
+ stringType));
+ mergingTree.put(
+ TinyIntType.class,
+ ImmutableList.of(
+ tinyIntType,
+ smallIntType,
+ intType,
+ bigIntType,
+ decimalType,
+ floatType,
+ doubleType,
+ stringType));
+
+ // Timestamp series
+ mergingTree.put(ZonedTimestampType.class, ImmutableList.of(timestampTzType, stringType));
+ mergingTree.put(
+ LocalZonedTimestampType.class,
+ ImmutableList.of(timestampLtzType, timestampTzType, stringType));
+ mergingTree.put(
+ TimestampType.class,
+ ImmutableList.of(timestampType, timestampLtzType, timestampTzType, stringType));
+ mergingTree.put(
+ DateType.class,
+ ImmutableList.of(
+ dateType, timestampType, timestampLtzType, timestampTzType, stringType));
+ mergingTree.put(TimeType.class, ImmutableList.of(stringType));
+
+ // Complex types
+ mergingTree.put(RowType.class, ImmutableList.of(stringType));
+ mergingTree.put(ArrayType.class, ImmutableList.of(stringType));
+ mergingTree.put(MapType.class, ImmutableList.of(stringType));
+ mergingTree.put(VariantType.class, ImmutableList.of(stringType));
+ return mergingTree;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/factories/FactoryHelperTests.java b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/factories/FactoryHelperTests.java
new file mode 100644
index 00000000000..3c2c82af1cd
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/factories/FactoryHelperTests.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.factories;
+
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.common.configuration.Configuration;
+import org.apache.flink.table.api.ValidationException;
+
+import org.apache.flink.shaded.guava33.com.google.common.collect.Sets;
+
+import org.assertj.core.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/** Tests for {@link FactoryHelper}. */
+class FactoryHelperTests {
+
+ private Factory getDummyFactory() {
+
+ return new Factory() {
+ @Override
+ public String identifier() {
+ return "dummy";
+ }
+
+ @Override
+ public Set> requiredOptions() {
+ return Sets.newHashSet(
+ ConfigOptions.key("id")
+ .intType()
+ .noDefaultValue()
+ .withFallbackKeys("id_fallback"),
+ ConfigOptions.key("name").stringType().noDefaultValue(),
+ ConfigOptions.key("age").doubleType().noDefaultValue());
+ }
+
+ @Override
+ public Set> optionalOptions() {
+ return Sets.newHashSet(
+ ConfigOptions.key("hobby")
+ .stringType()
+ .noDefaultValue()
+ .withFallbackKeys("hobby_fallback"),
+ ConfigOptions.key("location").stringType().defaultValue("Everywhere"),
+ ConfigOptions.key("misc")
+ .mapType()
+ .defaultValue(Collections.singletonMap("A", "Z")));
+ }
+ };
+ }
+
+ @Test
+ void testCorrectConfigValidation() {
+ // This is a valid configuration.
+ Map configurations = new HashMap<>();
+ configurations.put("id", "1");
+ configurations.put("name", "Alice");
+ configurations.put("age", "17");
+ configurations.put("location", "Here");
+
+ FactoryHelper factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+
+ factoryHelper.validate();
+
+ // Validation for fallback keys.
+ configurations.clear();
+ configurations.put("id_fallback", "2");
+ configurations.put("name", "Bob");
+ configurations.put("age", "18");
+ configurations.put("hobby_fallback", "Swimming");
+ factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+ factoryHelper.validate();
+ }
+
+ @Test
+ void testMissingRequiredOptionConfigValidation() {
+ // This configuration doesn't provide all required options.
+ Map configurations = new HashMap<>();
+ configurations.put("id", "1");
+ configurations.put("age", "17");
+ configurations.put("location", "Here");
+
+ FactoryHelper factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+
+ Assertions.assertThatThrownBy(factoryHelper::validate)
+ .isExactlyInstanceOf(ValidationException.class)
+ .hasMessageContaining("One or more required options are missing.");
+ }
+
+ @Test
+ void testIncompatibleTypeValidation() {
+ // This configuration has an option with mismatched type.
+ Map configurations = new HashMap<>();
+ configurations.put("id", "1");
+ configurations.put("name", "Alice");
+ configurations.put("age", "Not a number");
+ configurations.put("location", "Here");
+
+ FactoryHelper factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+
+ Assertions.assertThatThrownBy(factoryHelper::validate)
+ .isExactlyInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Could not parse value 'Not a number' for key 'age'.");
+ }
+
+ @Test
+ void testRedundantConfigValidation() {
+ // This configuration has redundant config options.
+ Map configurations = new HashMap<>();
+ configurations.put("id", "1");
+ configurations.put("name", "Alice");
+ configurations.put("age", "17");
+ configurations.put("what", "Not a valid configOption");
+
+ FactoryHelper factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+
+ Assertions.assertThatThrownBy(factoryHelper::validate)
+ .isExactlyInstanceOf(ValidationException.class)
+ .hasMessageContaining("Unsupported options found for 'dummy'.");
+ }
+
+ @Test
+ void testAllowedPrefixConfigValidation() {
+ // This configuration has allowed prefix options.
+ Map configurations = new HashMap<>();
+ configurations.put("id", "1");
+ configurations.put("name", "Alice");
+ configurations.put("age", "17");
+ configurations.put("debezium.foo", "Some debezium options");
+ configurations.put("debezium.bar", "Another debezium options");
+ configurations.put("canal.baz", "Yet another debezium options");
+
+ FactoryHelper factoryHelper =
+ FactoryHelper.createFactoryHelper(
+ getDummyFactory(),
+ new FactoryHelper.DefaultContext(
+ Configuration.fromMap(configurations), null, null));
+
+ Assertions.assertThatThrownBy(factoryHelper::validate)
+ .isExactlyInstanceOf(ValidationException.class)
+ .hasMessageContaining("Unsupported options found for 'dummy'.");
+
+ Assertions.assertThatThrownBy(() -> factoryHelper.validateExcept("debezium."))
+ .isExactlyInstanceOf(ValidationException.class)
+ .hasMessageContaining("Unsupported options found for 'dummy'.");
+
+ Assertions.assertThatThrownBy(() -> factoryHelper.validateExcept("canal."))
+ .isExactlyInstanceOf(ValidationException.class)
+ .hasMessageContaining("Unsupported options found for 'dummy'.");
+
+ factoryHelper.validateExcept("debezium.", "canal.");
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/schema/SelectorsTest.java b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/schema/SelectorsTest.java
new file mode 100644
index 00000000000..c2ada3b191b
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/schema/SelectorsTest.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.schema;
+
+import org.apache.flink.cdc.common.event.TableId;
+
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Test for {@link org.apache.flink.cdc.common.schema.Selectors}. */
+class SelectorsTest {
+
+ @Test
+ void testTableSelector() {
+
+ // nameSpace, schemaName, tableName
+ Selectors selectors =
+ new Selectors.SelectorsBuilder()
+ .includeTables("db.sc1.A[0-9]+,db.sc2.B[0-1]+,db.sc1.sc1")
+ .build();
+
+ assertAllowed(selectors, "db", "sc1", "sc1");
+ assertAllowed(selectors, "db", "sc1", "A1");
+ assertAllowed(selectors, "db", "sc1", "A2");
+ assertAllowed(selectors, "db", "sc2", "B0");
+ assertAllowed(selectors, "db", "sc2", "B1");
+ assertNotAllowed(selectors, "db", "sc1", "A");
+ assertNotAllowed(selectors, "db", "sc1a", "B");
+ assertNotAllowed(selectors, "db", "sc1", "AA");
+ assertNotAllowed(selectors, "db", "sc2", "B2");
+ assertNotAllowed(selectors, "db2", "sc1", "A1");
+ assertNotAllowed(selectors, "db2", "sc1", "A2");
+ assertNotAllowed(selectors, "db", "sc11", "A1");
+ assertNotAllowed(selectors, "db", "sc1A", "A1");
+
+ selectors =
+ new Selectors.SelectorsBuilder()
+ .includeTables("db\\..sc1.A[0-9]+,db.sc2.B[0-1]+,db\\..sc1.sc1,db.sc1.sc1")
+ .build();
+
+ assertAllowed(selectors, "db", "sc1", "sc1");
+ assertAllowed(selectors, "db1", "sc1", "sc1");
+ assertAllowed(selectors, "dba", "sc1", "sc1");
+ assertAllowed(selectors, "db1", "sc1", "A1");
+ assertAllowed(selectors, "dba", "sc1", "A2");
+ assertAllowed(selectors, "db", "sc2", "B0");
+ assertAllowed(selectors, "db", "sc2", "B1");
+ assertNotAllowed(selectors, "db", "sc1", "A");
+ assertNotAllowed(selectors, "db", "sc1a", "B");
+ assertNotAllowed(selectors, "db", "sc1", "AA");
+ assertNotAllowed(selectors, "db", "sc2", "B2");
+ assertNotAllowed(selectors, "dba1", "sc1", "A1");
+ assertNotAllowed(selectors, "dba2", "sc1", "A2");
+ assertNotAllowed(selectors, "db", "sc11", "A1");
+ assertNotAllowed(selectors, "db", "sc1A", "A1");
+
+ // schemaName, tableName
+ selectors =
+ new Selectors.SelectorsBuilder()
+ .includeTables("sc1.A[0-9]+,sc2.B[0-1]+,sc1.sc1")
+ .build();
+
+ assertAllowed(selectors, null, "sc1", "sc1");
+ assertAllowed(selectors, null, "sc1", "A1");
+ assertAllowed(selectors, null, "sc1", "A2");
+ assertAllowed(selectors, null, "sc2", "B0");
+ assertAllowed(selectors, null, "sc2", "B1");
+ assertNotAllowed(selectors, "db", "sc1", "A1");
+ assertNotAllowed(selectors, null, "sc1", "A");
+ assertNotAllowed(selectors, null, "sc2", "B");
+ assertNotAllowed(selectors, null, "sc1", "AA");
+ assertNotAllowed(selectors, null, "sc11", "A1");
+ assertNotAllowed(selectors, null, "sc1A", "A1");
+
+ // tableName
+ selectors =
+ new Selectors.SelectorsBuilder().includeTables("\\.A[0-9]+,B[0-1]+,sc1").build();
+
+ assertAllowed(selectors, null, null, "sc1");
+ assertNotAllowed(selectors, "db", "sc1", "sc1");
+ assertNotAllowed(selectors, null, "sc1", "sc1");
+ assertAllowed(selectors, null, null, "1A1");
+ assertAllowed(selectors, null, null, "AA2");
+ assertAllowed(selectors, null, null, "B0");
+ assertAllowed(selectors, null, null, "B1");
+ assertNotAllowed(selectors, "db", "sc1", "A1");
+ assertNotAllowed(selectors, null, null, "A");
+ assertNotAllowed(selectors, null, null, "B");
+ assertNotAllowed(selectors, null, null, "2B");
+
+ selectors =
+ new Selectors.SelectorsBuilder()
+ .includeTables("sc1.A[0-9]+,sc2.B[0-1]+,sc1.sc1")
+ .build();
+
+ assertAllowed(selectors, null, "sc1", "sc1");
+ assertAllowed(selectors, null, "sc1", "A1");
+ assertAllowed(selectors, null, "sc1", "A2");
+ assertAllowed(selectors, null, "sc1", "A2");
+ assertAllowed(selectors, null, "sc2", "B0");
+ assertNotAllowed(selectors, "db", "sc1", "A1");
+ assertNotAllowed(selectors, null, "sc1", "A");
+ assertNotAllowed(selectors, null, "sc1", "AA");
+ assertNotAllowed(selectors, null, "sc2", "B");
+ assertNotAllowed(selectors, null, "sc2", "B2");
+ assertNotAllowed(selectors, null, "sc11", "A1");
+ assertNotAllowed(selectors, null, "sc1A", "A1");
+
+ selectors = new Selectors.SelectorsBuilder().includeTables("sc1.sc1").build();
+ assertAllowed(selectors, null, "sc1", "sc1");
+
+ selectors = new Selectors.SelectorsBuilder().includeTables("sc1.sc[0-9]+").build();
+ assertAllowed(selectors, null, "sc1", "sc1");
+
+ selectors = new Selectors.SelectorsBuilder().includeTables("sc1.\\.*").build();
+ assertAllowed(selectors, null, "sc1", "sc1");
+ }
+
+ protected void assertAllowed(
+ Selectors filter, String nameSpace, String schemaName, String tableName) {
+
+ TableId id = getTableId(nameSpace, schemaName, tableName);
+
+ assertThat(filter.isMatch(id)).isTrue();
+ }
+
+ protected void assertNotAllowed(
+ Selectors filter, String nameSpace, String schemaName, String tableName) {
+
+ TableId id = getTableId(nameSpace, schemaName, tableName);
+
+ assertThat(filter.isMatch(id)).isFalse();
+ }
+
+ private static TableId getTableId(String nameSpace, String schemaName, String tableName) {
+ TableId id;
+ if (nameSpace == null && schemaName == null) {
+ id = TableId.tableId(tableName);
+ } else if (nameSpace == null) {
+ id = TableId.tableId(schemaName, tableName);
+ } else {
+ id = TableId.tableId(nameSpace, schemaName, tableName);
+ }
+ return id;
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilderTest.java b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilderTest.java
new file mode 100644
index 00000000000..b3667d0e50d
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantInternalBuilderTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.variant;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class BinaryVariantInternalBuilderTest {
+
+ @Test
+ void testParseScalarJson() throws IOException {
+ assertThat(BinaryVariantInternalBuilder.parseJson("1", false).getByte())
+ .isEqualTo((byte) 1);
+ short s = (short) (Byte.MAX_VALUE + 1L);
+ assertThat(BinaryVariantInternalBuilder.parseJson(String.valueOf(s), false).getShort())
+ .isEqualTo(s);
+ int i = (int) (Short.MAX_VALUE + 1L);
+ assertThat(BinaryVariantInternalBuilder.parseJson(String.valueOf(i), false).getInt())
+ .isEqualTo(i);
+ long l = Integer.MAX_VALUE + 1L;
+ assertThat(BinaryVariantInternalBuilder.parseJson(String.valueOf(l), false).getLong())
+ .isEqualTo(l);
+
+ BigDecimal bigDecimal = BigDecimal.valueOf(Long.MAX_VALUE).add(BigDecimal.ONE);
+ assertThat(
+ BinaryVariantInternalBuilder.parseJson(bigDecimal.toPlainString(), false)
+ .getDecimal())
+ .isEqualTo(bigDecimal);
+
+ assertThat(BinaryVariantInternalBuilder.parseJson("1.123", false).getDecimal())
+ .isEqualTo(BigDecimal.valueOf(1.123));
+ assertThat(
+ BinaryVariantInternalBuilder.parseJson(
+ String.valueOf(Double.MAX_VALUE), false)
+ .getDouble())
+ .isEqualTo(Double.MAX_VALUE);
+
+ assertThat(BinaryVariantInternalBuilder.parseJson("\"hello\"", false).getString())
+ .isEqualTo("hello");
+
+ assertThat(BinaryVariantInternalBuilder.parseJson("true", false).getBoolean()).isTrue();
+
+ assertThat(BinaryVariantInternalBuilder.parseJson("false", false).getBoolean()).isFalse();
+
+ assertThat(BinaryVariantInternalBuilder.parseJson("null", false).isNull()).isTrue();
+ }
+
+ @Test
+ void testParseJsonArray() throws IOException {
+ BinaryVariant variant = BinaryVariantInternalBuilder.parseJson("[]", false);
+ assertThat(variant.getElement(0)).isNull();
+
+ variant = BinaryVariantInternalBuilder.parseJson("[1,\"hello\",3.1, null]", false);
+ assertThat(variant.getElement(0).getByte()).isEqualTo((byte) 1);
+ assertThat(variant.getElement(1).getString()).isEqualTo("hello");
+ assertThat(variant.getElement(2).getDecimal()).isEqualTo(BigDecimal.valueOf(3.1));
+ assertThat(variant.getElement(3).isNull()).isTrue();
+
+ variant = BinaryVariantInternalBuilder.parseJson("[1,[\"hello\",[3.1]]]", false);
+ assertThat(variant.getElement(0).getByte()).isEqualTo((byte) 1);
+ assertThat(variant.getElement(1).getElement(0).getString()).isEqualTo("hello");
+ assertThat(variant.getElement(1).getElement(1).getElement(0).getDecimal())
+ .isEqualTo(BigDecimal.valueOf(3.1));
+ }
+
+ @Test
+ void testParseJsonObject() throws IOException {
+ BinaryVariant variant = BinaryVariantInternalBuilder.parseJson("{}", false);
+ assertThat(variant.getField("a")).isNull();
+
+ variant =
+ BinaryVariantInternalBuilder.parseJson(
+ "{\"a\":1,\"b\":\"hello\",\"c\":3.1}", false);
+
+ assertThat(variant.getField("a").getByte()).isEqualTo((byte) 1);
+ assertThat(variant.getField("b").getString()).isEqualTo("hello");
+ assertThat(variant.getField("c").getDecimal()).isEqualTo(BigDecimal.valueOf(3.1));
+
+ variant =
+ BinaryVariantInternalBuilder.parseJson(
+ "{\"a\":1,\"b\":{\"c\":\"hello\",\"d\":[3.1]}}", false);
+ assertThat(variant.getField("a").getByte()).isEqualTo((byte) 1);
+ assertThat(variant.getField("b").getField("c").getString()).isEqualTo("hello");
+ assertThat(variant.getField("b").getField("d").getElement(0).getDecimal())
+ .isEqualTo(BigDecimal.valueOf(3.1));
+
+ assertThatThrownBy(
+ () ->
+ BinaryVariantInternalBuilder.parseJson(
+ "{\"k1\":1,\"k1\":2,\"k2\":1.5}", false))
+ .isInstanceOf(VariantTypeException.class)
+ .hasMessage("VARIANT_DUPLICATE_KEY");
+
+ variant = BinaryVariantInternalBuilder.parseJson("{\"k1\":1,\"k1\":2,\"k2\":1.5}", true);
+ assertThat(variant.getField("k1").getByte()).isEqualTo((byte) 2);
+ assertThat(variant.getField("k2").getDecimal()).isEqualTo(BigDecimal.valueOf(1.5));
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantTest.java b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantTest.java
new file mode 100644
index 00000000000..41726d14a0c
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/types/variant/BinaryVariantTest.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.types.variant;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.math.BigDecimal;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.temporal.ChronoUnit;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class BinaryVariantTest {
+
+ private BinaryVariantBuilder builder;
+
+ @BeforeEach
+ void setUp() {
+ builder = new BinaryVariantBuilder();
+ }
+
+ @Test
+ void testScalarVariant() {
+
+ assertThat(builder.of((byte) 10).isPrimitive()).isTrue();
+ assertThat(builder.of((byte) 10).isNull()).isFalse();
+ assertThat(builder.of((byte) 10).isArray()).isFalse();
+ assertThat(builder.of((byte) 10).isObject()).isFalse();
+ assertThat(builder.of((byte) 10).getType()).isEqualTo(Variant.Type.TINYINT);
+
+ assertThat(builder.of((byte) 10).getByte()).isEqualTo((byte) 10);
+ assertThat(builder.of((byte) 10).get()).isEqualTo((byte) 10);
+ assertThat((byte) builder.of((byte) 10).getAs()).isEqualTo((byte) 10);
+
+ assertThat(builder.of((short) 10).getShort()).isEqualTo((short) 10);
+ assertThat(builder.of((short) 10).get()).isEqualTo((short) 10);
+
+ assertThat(builder.of(10).getInt()).isEqualTo(10);
+ assertThat(builder.of(10).get()).isEqualTo(10);
+
+ assertThat(builder.of(10L).getLong()).isEqualTo(10L);
+ assertThat(builder.of(10L).get()).isEqualTo(10L);
+
+ assertThat(builder.of(10.0).getDouble()).isEqualTo(10.0d);
+ assertThat(builder.of(10.0).get()).isEqualTo(10.0d);
+
+ assertThat(builder.of(10.0f).getFloat()).isEqualTo(10.0f);
+ assertThat(builder.of(10.0f).get()).isEqualTo(10.0f);
+
+ assertThat(builder.of("hello").getString()).isEqualTo("hello");
+ assertThat(builder.of("hello").get()).isEqualTo("hello");
+
+ assertThat(builder.of("hello".getBytes()).getBytes()).isEqualTo("hello".getBytes());
+ assertThat(builder.of("hello".getBytes()).get()).isEqualTo("hello".getBytes());
+
+ assertThat(builder.of(true).getBoolean()).isTrue();
+ assertThat(builder.of(true).get()).isEqualTo(true);
+
+ assertThat(builder.of(BigDecimal.valueOf(100)).getDecimal())
+ .isEqualByComparingTo(BigDecimal.valueOf(100));
+ assertThat((BigDecimal) builder.of(BigDecimal.valueOf(100)).get())
+ .isEqualByComparingTo(BigDecimal.valueOf(100));
+
+ Instant instant = Instant.now().truncatedTo(ChronoUnit.MICROS);
+ assertThat(builder.of(instant).getInstant()).isEqualTo(instant);
+ assertThat(builder.of(instant).get()).isEqualTo(instant);
+
+ LocalDateTime localDateTime = LocalDateTime.now().truncatedTo(ChronoUnit.MICROS);
+ assertThat(builder.of(localDateTime).getDateTime()).isEqualTo(localDateTime);
+ assertThat(builder.of(localDateTime).get()).isEqualTo(localDateTime);
+
+ LocalDate localDate = LocalDate.now();
+ assertThat(builder.of(localDate).getDate()).isEqualTo(localDate);
+ assertThat(builder.of(localDate).get()).isEqualTo(localDate);
+
+ assertThat(builder.ofNull().get()).isEqualTo(null);
+ assertThat(builder.ofNull().isNull()).isTrue();
+ }
+
+ @Test
+ void testArrayVariant() {
+ Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS);
+ Variant variant =
+ builder.array()
+ .add(builder.of(1))
+ .add(builder.of("hello"))
+ .add(builder.of(now))
+ .add(builder.array().add(builder.of("hello2")).add(builder.of(10f)).build())
+ .add(builder.ofNull())
+ .build();
+
+ assertThat(variant.isArray()).isTrue();
+ assertThat(variant.isPrimitive()).isFalse();
+ assertThat(variant.isObject()).isFalse();
+ assertThat(variant.getType()).isEqualTo(Variant.Type.ARRAY);
+
+ assertThat(variant.getElement(-1)).isNull();
+ assertThat(variant.getElement(0).getInt()).isEqualTo(1);
+ assertThat(variant.getElement(1).getString()).isEqualTo("hello");
+ assertThat(variant.getElement(2).getInstant()).isEqualTo(now);
+ assertThat(variant.getElement(3).getElement(0).getString()).isEqualTo("hello2");
+ assertThat(variant.getElement(3).getElement(1).getFloat()).isEqualTo(10f);
+ assertThat(variant.getElement(4).isNull()).isTrue();
+ assertThat(variant.getElement(5)).isNull();
+ }
+
+ @Test
+ void testObjectVariant() {
+ Variant variant =
+ builder.object()
+ .add(
+ "list",
+ builder.array().add(builder.of("hello")).add(builder.of(1)).build())
+ .add(
+ "object",
+ builder.object()
+ .add("ss", builder.of((short) 1))
+ .add("ff", builder.of(10.0f))
+ .build())
+ .add("bb", builder.of((byte) 10))
+ .build();
+
+ assertThat(variant.isArray()).isFalse();
+ assertThat(variant.isPrimitive()).isFalse();
+ assertThat(variant.isObject()).isTrue();
+ assertThat(variant.getType()).isEqualTo(Variant.Type.OBJECT);
+
+ assertThat(variant.getField("list").isArray()).isTrue();
+ assertThat(variant.getField("list").getElement(0).getString()).isEqualTo("hello");
+ assertThat(variant.getField("list").getElement(1).getInt()).isEqualTo(1);
+
+ assertThat(variant.getField("object").isObject()).isTrue();
+ assertThat(variant.getField("object").getField("ss").getShort()).isEqualTo((short) 1);
+ assertThat(variant.getField("object").getField("ff").getFloat()).isEqualTo((10.0f));
+
+ assertThat(variant.getField("bb").getByte()).isEqualTo((byte) 10);
+ assertThat(variant.getField("non_exist")).isNull();
+
+ BinaryVariantBuilder.VariantObjectBuilder objectBuilder = builder.object();
+
+ for (int i = 0; i < 100; i++) {
+ objectBuilder.add(String.valueOf(i), builder.of(i));
+ }
+ variant = objectBuilder.build();
+ for (int i = 0; i < 100; i++) {
+ assertThat(variant.getField(String.valueOf(i)).getInt()).isEqualTo(i);
+ }
+ }
+
+ @Test
+ void testDuplicatedKeyObjectVariant() {
+ assertThatThrownBy(
+ () ->
+ builder.object(false)
+ .add("k", builder.of((byte) 10))
+ .add("k", builder.of("hello"))
+ .build())
+ .isInstanceOf(RuntimeException.class)
+ .hasMessage("VARIANT_DUPLICATE_KEY");
+
+ Variant variant =
+ builder.object(true)
+ .add("k", builder.of((byte) 10))
+ .add("k", builder.of("hello"))
+ .add("k1", builder.of(10))
+ .build();
+
+ assertThat(variant.getField("k").getString()).isEqualTo("hello");
+ assertThat(variant.getField("k1").getInt()).isEqualTo(10);
+ }
+
+ @Test
+ void testToJsonScalar() {
+ Instant instant = Instant.EPOCH;
+ LocalDateTime localDateTime = LocalDateTime.of(2000, 1, 1, 0, 0);
+ LocalDate localDate = LocalDate.of(2000, 1, 1);
+
+ assertThat(builder.of((byte) 1).toJson()).isEqualTo("1");
+ assertThat(builder.of((short) 1).toJson()).isEqualTo("1");
+ assertThat(builder.of(1L).toJson()).isEqualTo("1");
+ assertThat(builder.of(1).toJson()).isEqualTo("1");
+ assertThat(builder.of("hello").toJson()).isEqualTo("\"hello\"");
+ assertThat(builder.of(true).toJson()).isEqualTo("true");
+ assertThat(builder.of(10.0f).toJson()).isEqualTo("10.0");
+ assertThat(builder.of(10.0d).toJson()).isEqualTo("10.0");
+ assertThat(builder.of(BigDecimal.valueOf(100)).toJson()).isEqualTo("100");
+ assertThat(builder.of(instant).toJson()).isEqualTo("\"1970-01-01T00:00:00+00:00\"");
+ assertThat(builder.of(localDateTime).toJson()).isEqualTo("\"2000-01-01T00:00:00\"");
+ assertThat(builder.of(localDate).toJson()).isEqualTo("\"2000-01-01\"");
+ assertThat(builder.of("hello".getBytes()).toJson()).isEqualTo("\"aGVsbG8=\"");
+ assertThat(builder.ofNull().toJson()).isEqualTo("null");
+ }
+
+ @Test
+ void testToJsonNested() {
+ Variant variant =
+ builder.object()
+ .add(
+ "list",
+ builder.array().add(builder.of("hello")).add(builder.of(1)).build())
+ .add(
+ "object",
+ builder.object()
+ .add("ss", builder.of((short) 1))
+ .add("ff", builder.of(10.0f))
+ .build())
+ .build();
+
+ String json = variant.toJson();
+ assertThat(json)
+ .isEqualTo("{" + "\"list\":[\"hello\",1]," + "\"object\":{\"ff\":10.0,\"ss\":1}}");
+ }
+
+ @Test
+ void testVariantException() {
+ assertThatThrownBy(() -> new BinaryVariant(new byte[0], new byte[0]))
+ .isInstanceOf(RuntimeException.class)
+ .hasMessage("MALFORMED_VARIANT");
+
+ byte[] meta = new byte[1];
+ meta[0] = (byte) 0x02;
+ assertThatThrownBy(() -> new BinaryVariant(new byte[1], meta))
+ .isInstanceOf(RuntimeException.class)
+ .hasMessage("MALFORMED_VARIANT");
+
+ byte[] oversize = new byte[0xFFFFFF + 2];
+ meta[0] = (byte) 0x01;
+ oversize[0] = (byte) 0x01;
+ assertThatThrownBy(() -> new BinaryVariant(oversize, meta))
+ .isInstanceOf(RuntimeException.class)
+ .hasMessage("VARIANT_CONSTRUCTOR_SIZE_LIMIT");
+
+ assertThatThrownBy(() -> new BinaryVariant(new byte[1], oversize))
+ .isInstanceOf(RuntimeException.class)
+ .hasMessage("VARIANT_CONSTRUCTOR_SIZE_LIMIT");
+ }
+
+ @Test
+ void testGetThrowException() {
+ Variant variant = builder.of(10f);
+ assertThatThrownBy(variant::getDouble)
+ .isInstanceOf(VariantTypeException.class)
+ .hasMessage("Expected type DOUBLE but got FLOAT");
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java
new file mode 100644
index 00000000000..791c5fe7ea0
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/java/org/apache/flink/cdc/common/utils/SchemaMergingUtilsTest.java
@@ -0,0 +1,1228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.common.utils;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.tuple.Tuple4;
+import org.apache.flink.cdc.common.data.DateData;
+import org.apache.flink.cdc.common.data.DecimalData;
+import org.apache.flink.cdc.common.data.LocalZonedTimestampData;
+import org.apache.flink.cdc.common.data.TimeData;
+import org.apache.flink.cdc.common.data.TimestampData;
+import org.apache.flink.cdc.common.data.ZonedTimestampData;
+import org.apache.flink.cdc.common.data.binary.BinaryStringData;
+import org.apache.flink.cdc.common.event.AddColumnEvent;
+import org.apache.flink.cdc.common.event.AlterColumnTypeEvent;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.event.DropColumnEvent;
+import org.apache.flink.cdc.common.event.SchemaChangeEvent;
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.schema.Column;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.DataTypes;
+import org.apache.flink.cdc.common.types.DecimalType;
+import org.apache.flink.cdc.common.types.LocalZonedTimestampType;
+import org.apache.flink.cdc.common.types.TimestampType;
+import org.apache.flink.cdc.common.types.ZonedTimestampType;
+
+import org.apache.flink.shaded.guava33.com.google.common.collect.ImmutableMap;
+
+import org.assertj.core.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Stream;
+
+import static org.apache.flink.cdc.common.types.DataTypes.DECIMAL;
+import static org.apache.flink.cdc.common.types.DataTypes.VARCHAR;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.coerceObject;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.coerceRow;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.getLeastCommonSchema;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.getLeastCommonType;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.getSchemaDifference;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.isDataTypeCompatible;
+import static org.apache.flink.cdc.common.utils.SchemaMergingUtils.isSchemaCompatible;
+
+/** A test for the {@link SchemaMergingUtils}. */
+class SchemaMergingUtilsTest {
+
+ private static final TableId TABLE_ID = TableId.tableId("foo", "bar", "baz");
+
+ private static final DataType CHAR = DataTypes.CHAR(17);
+ private static final DataType VARCHAR = DataTypes.VARCHAR(17);
+ private static final DataType STRING = DataTypes.STRING();
+
+ private static final DataType BOOLEAN = DataTypes.BOOLEAN();
+ private static final DataType BINARY = DataTypes.BINARY(17);
+ private static final DataType VARBINARY = DataTypes.VARBINARY(17);
+ private static final DataType SMALLINT = DataTypes.SMALLINT();
+ private static final DataType TINYINT = DataTypes.TINYINT();
+ private static final DataType INT = DataTypes.INT();
+ private static final DataType BIGINT = DataTypes.BIGINT();
+ private static final DataType DECIMAL =
+ DECIMAL(DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE);
+ private static final DataType FLOAT = DataTypes.FLOAT();
+ private static final DataType DOUBLE = DataTypes.DOUBLE();
+
+ private static final DataType TIMESTAMP_TZ =
+ DataTypes.TIMESTAMP_TZ(ZonedTimestampType.MAX_PRECISION);
+ private static final DataType TIMESTAMP_LTZ =
+ DataTypes.TIMESTAMP_LTZ(LocalZonedTimestampType.MAX_PRECISION);
+ private static final DataType TIMESTAMP = DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION);
+ private static final DataType DATE = DataTypes.DATE();
+ private static final DataType TIME = DataTypes.TIME();
+
+ private static final DataType ROW = DataTypes.ROW(INT, STRING);
+ private static final DataType ARRAY = DataTypes.ARRAY(STRING);
+ private static final DataType MAP = DataTypes.MAP(INT, STRING);
+ private static final DataType VARIANT = DataTypes.VARIANT();
+
+ private static final List ALL_TYPES =
+ Arrays.asList(
+ // Binary types
+ STRING,
+ CHAR,
+ VARCHAR,
+ BINARY,
+ VARBINARY,
+ // Exact numeric types
+ TINYINT,
+ SMALLINT,
+ INT,
+ BIGINT,
+ DECIMAL,
+ // Inexact numeric types
+ FLOAT,
+ DOUBLE,
+ // Date and time types
+ TIMESTAMP,
+ TIMESTAMP_LTZ,
+ TIMESTAMP_TZ,
+ TIME,
+ // Complex types
+ ROW,
+ ARRAY,
+ MAP,
+ VARIANT);
+
+ private static final Map DUMMY_OBJECTS =
+ ImmutableMap.of(
+ TINYINT,
+ (byte) 17,
+ SMALLINT,
+ (short) 17,
+ INT,
+ 17,
+ BIGINT,
+ 17L,
+ DECIMAL,
+ decOf(17),
+ FLOAT,
+ 17.0f,
+ DOUBLE,
+ 17.0);
+
+ @Test
+ void testIsSchemaCompatible() {
+ Assertions.assertThat(isSchemaCompatible(null, of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test merging into an empty schema")
+ .isFalse();
+
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test identical schema")
+ .isTrue();
+
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT)))
+ .as("swapping sequence is ok")
+ .isTrue();
+
+ Assertions.assertThat(
+ isSchemaCompatible(of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT)))
+ .as("test a wider upcoming schema")
+ .isTrue();
+
+ Assertions.assertThat(
+ isSchemaCompatible(of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a narrower upcoming schema")
+ .isFalse();
+
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "name", STRING),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a wider typed upcoming schema")
+ .isTrue();
+
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", STRING)))
+ .as("test a narrower typed upcoming schema")
+ .isFalse();
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", BIGINT),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into BIGINT", type)
+ .isTrue());
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", BIGINT)))
+ .as("test fitting BIGINT into %s", type)
+ .isFalse());
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", STRING),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into STRING", type)
+ .isTrue());
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", STRING)))
+ .as("test fitting STRING into %s", type)
+ .isFalse());
+
+ Stream.of(FLOAT, DOUBLE, STRING)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", STRING),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into STRING", type)
+ .isTrue());
+
+ Stream.of(FLOAT, DOUBLE)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", STRING)))
+ .as("test fitting STRING into %s", type)
+ .isFalse());
+
+ Assertions.assertThat(
+ isSchemaCompatible(
+ of("id", BIGINT, "foo", INT), of("id", BIGINT, "bar", INT)))
+ .as("columns with different names")
+ .isFalse();
+ }
+
+ @Test
+ void testGetLeastCommonSchema() {
+ Assertions.assertThat(getLeastCommonSchema(null, of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test merging into an empty schema")
+ .isEqualTo(of("id", BIGINT, "name", VARCHAR(17)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test identical schema")
+ .isEqualTo(of("id", BIGINT, "name", VARCHAR(17)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT)))
+ .as("swapping sequence is ok")
+ .isEqualTo(of("id", BIGINT, "name", VARCHAR(17)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT)))
+ .as("test a wider upcoming schema")
+ .isEqualTo(of("id", BIGINT, "name", VARCHAR(17)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a narrower upcoming schema")
+ .isEqualTo(of("id", BIGINT, "name", VARCHAR(17)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "name", STRING),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a wider typed upcoming schema")
+ .isEqualTo(of("id", BIGINT, "name", STRING));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", STRING)))
+ .as("test a narrower typed upcoming schema")
+ .isEqualTo(of("id", BIGINT, "name", STRING));
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "number", BIGINT),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into BIGINT", type)
+ .isEqualTo(of("id", BIGINT, "number", BIGINT)));
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", BIGINT)))
+ .as("test fitting BIGINT into %s", type)
+ .isEqualTo(of("id", BIGINT, "number", BIGINT)));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "number", STRING),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into STRING", type)
+ .isEqualTo(of("id", BIGINT, "number", STRING)));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", STRING)))
+ .as("test fitting STRING into %s", type)
+ .isEqualTo(of("id", BIGINT, "number", STRING)));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "foo", INT), of("id", BIGINT, "bar", INT)))
+ .as("columns with different names")
+ .isEqualTo(of("id", BIGINT, "foo", INT, "bar", INT));
+
+ Assertions.assertThat(
+ getLeastCommonSchema(
+ of("id", BIGINT, "foo", INT, "baz", FLOAT),
+ of("id", BIGINT, "bar", INT, "baz", DOUBLE)))
+ .as("mixed schema differences")
+ .isEqualTo(of("id", BIGINT, "foo", INT, "baz", DOUBLE, "bar", INT));
+ }
+
+ @Test
+ void testGetSchemaDifference() {
+ Assertions.assertThat(
+ getSchemaDifference(TABLE_ID, null, of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test merging into an empty schema")
+ .containsExactly(
+ new CreateTableEvent(TABLE_ID, of("id", BIGINT, "name", VARCHAR(17))));
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test identical schema")
+ .isEmpty();
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT)))
+ .as("swapping sequence is ok")
+ .isEmpty();
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID, of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a widening upcoming schema")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("name", VARCHAR(17)),
+ AddColumnEvent.ColumnPosition.AFTER,
+ "id"))));
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID, of("id", BIGINT), of("name", VARCHAR(17), "id", BIGINT)))
+ .as("test a widening upcoming schema at first")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("name", VARCHAR(17)),
+ AddColumnEvent.ColumnPosition.FIRST,
+ null))));
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", STRING)))
+ .as("test a type-widening typed upcoming schema")
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("name", STRING),
+ Collections.singletonMap("name", VARCHAR(17))));
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "name", STRING, "number", BIGINT),
+ of("id", BIGINT)))
+ .as("test remove id while add gentle")
+ .containsExactly(new DropColumnEvent(TABLE_ID, Arrays.asList("number", "name")));
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "name", STRING, "number", BIGINT),
+ of("id", BIGINT, "name", STRING, "gentle", STRING)))
+ .as("test remove id while add gentle")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("gentle", STRING),
+ AddColumnEvent.ColumnPosition.AFTER,
+ "name"))),
+ new DropColumnEvent(TABLE_ID, Collections.singletonList("number")));
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", BIGINT)))
+ .as("test escalating %s to BIGINT", type)
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("number", BIGINT),
+ Collections.singletonMap("number", type))));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, FLOAT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", DOUBLE)))
+ .as("test escalating %s to DOUBLE", type)
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("number", DOUBLE),
+ Collections.singletonMap("number", type))));
+
+ Assertions.assertThat(
+ getSchemaDifference(
+ TABLE_ID,
+ of("id", BIGINT, "foo", INT, "baz", FLOAT),
+ of("id", BIGINT, "foo", BIGINT, "bar", INT, "baz", DOUBLE)))
+ .as("mixed schema differences")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("bar", INT),
+ AddColumnEvent.ColumnPosition.AFTER,
+ "foo"))),
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ ImmutableMap.of("foo", BIGINT, "baz", DOUBLE),
+ ImmutableMap.of("foo", INT, "baz", FLOAT)));
+ }
+
+ @Test
+ void testMergeAndDiff() {
+ Assertions.assertThat(mergeAndDiff(null, of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test merging into an empty schema")
+ .containsExactly(
+ new CreateTableEvent(TABLE_ID, of("id", BIGINT, "name", VARCHAR(17))));
+
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test identical schema")
+ .isEmpty();
+
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT)))
+ .as("swapping sequence is ok")
+ .isEmpty();
+
+ Assertions.assertThat(mergeAndDiff(of("id", BIGINT, "name", VARCHAR(17)), of("id", BIGINT)))
+ .as("test a wider upcoming schema")
+ .isEmpty();
+
+ Assertions.assertThat(mergeAndDiff(of("id", BIGINT), of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a narrower upcoming schema")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("name", VARCHAR(17)),
+ AddColumnEvent.ColumnPosition.AFTER,
+ "id"))));
+
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "name", STRING),
+ of("id", BIGINT, "name", VARCHAR(17))))
+ .as("test a wider typed upcoming schema")
+ .isEmpty();
+
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", STRING)))
+ .as("test a narrower typed upcoming schema")
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("name", STRING),
+ Collections.singletonMap("name", VARCHAR(17))));
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "number", BIGINT),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into BIGINT", type)
+ .isEmpty());
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", BIGINT)))
+ .as("test fitting BIGINT into %s", type)
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("number", BIGINT),
+ Collections.singletonMap("number", type))));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, STRING)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "number", STRING),
+ of("id", BIGINT, "number", type)))
+ .as("test fitting %s into STRING", type)
+ .isEmpty());
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "number", type),
+ of("id", BIGINT, "number", STRING)))
+ .as("test fitting STRING into %s", type)
+ .containsExactly(
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("number", STRING),
+ Collections.singletonMap("number", type))));
+
+ Assertions.assertThat(
+ mergeAndDiff(
+ of("id", BIGINT, "foo", INT, "baz", FLOAT),
+ of("id", BIGINT, "bar", INT, "baz", DOUBLE)))
+ .as("mixed schema differences")
+ .containsExactly(
+ new AddColumnEvent(
+ TABLE_ID,
+ Collections.singletonList(
+ new AddColumnEvent.ColumnWithPosition(
+ Column.physicalColumn("bar", INT),
+ AddColumnEvent.ColumnPosition.AFTER,
+ "baz"))),
+ new AlterColumnTypeEvent(
+ TABLE_ID,
+ Collections.singletonMap("baz", DOUBLE),
+ Collections.singletonMap("baz", FLOAT)));
+ }
+
+ @Test
+ void testIsDataTypeCompatible() {
+ List> viableConversions =
+ Arrays.asList(
+ Tuple2.of(CHAR, STRING),
+ Tuple2.of(VARCHAR, STRING),
+ Tuple2.of(BOOLEAN, STRING),
+ Tuple2.of(BINARY, STRING),
+ Tuple2.of(DOUBLE, STRING),
+ Tuple2.of(FLOAT, STRING),
+ Tuple2.of(DECIMAL, STRING),
+ Tuple2.of(BIGINT, STRING),
+ Tuple2.of(INT, STRING),
+ Tuple2.of(SMALLINT, STRING),
+ Tuple2.of(TINYINT, STRING),
+ Tuple2.of(TIMESTAMP_TZ, STRING),
+ Tuple2.of(TIMESTAMP_LTZ, STRING),
+ Tuple2.of(TIMESTAMP, STRING),
+ Tuple2.of(DATE, STRING),
+ Tuple2.of(TIME, STRING),
+ Tuple2.of(ROW, STRING),
+ Tuple2.of(ARRAY, STRING),
+ Tuple2.of(MAP, STRING),
+ Tuple2.of(TINYINT, SMALLINT),
+ Tuple2.of(SMALLINT, INT),
+ Tuple2.of(INT, BIGINT),
+ Tuple2.of(BIGINT, DECIMAL),
+ Tuple2.of(DECIMAL, STRING),
+ Tuple2.of(FLOAT, DOUBLE),
+ Tuple2.of(DATE, TIMESTAMP),
+ Tuple2.of(TIMESTAMP, TIMESTAMP_LTZ),
+ Tuple2.of(TIMESTAMP_LTZ, TIMESTAMP_TZ));
+
+ List> infeasibleConversions =
+ Arrays.asList(
+ Tuple2.of(CHAR, BOOLEAN),
+ Tuple2.of(BOOLEAN, BINARY),
+ Tuple2.of(BINARY, DOUBLE),
+ Tuple2.of(DOUBLE, TIMESTAMP_TZ),
+ Tuple2.of(TIMESTAMP_TZ, TIME),
+ Tuple2.of(TIME, ROW),
+ Tuple2.of(ROW, ARRAY),
+ Tuple2.of(ARRAY, MAP));
+
+ viableConversions.forEach(
+ conv ->
+ Assertions.assertThat(isDataTypeCompatible(conv.f1, conv.f0))
+ .as("test fitting %s into %s", conv.f0, conv.f1)
+ .isTrue());
+
+ viableConversions.forEach(
+ conv ->
+ Assertions.assertThat(isDataTypeCompatible(conv.f0, conv.f1))
+ .as("test fitting %s into %s", conv.f1, conv.f0)
+ .isFalse());
+
+ infeasibleConversions.forEach(
+ conv ->
+ Assertions.assertThat(isDataTypeCompatible(conv.f1, conv.f0))
+ .as("test fitting %s into %s", conv.f0, conv.f1)
+ .isFalse());
+
+ infeasibleConversions.forEach(
+ conv ->
+ Assertions.assertThat(isDataTypeCompatible(conv.f0, conv.f1))
+ .as("test fitting %s into %s", conv.f1, conv.f0)
+ .isFalse());
+ }
+
+ @Test
+ void testCoerceObject() {
+ Stream> conversionExpects =
+ Stream.of(
+ // From TINYINT
+ Tuple4.of(TINYINT, (byte) 0, TINYINT, (byte) 0),
+ Tuple4.of(TINYINT, (byte) 1, SMALLINT, (short) 1),
+ Tuple4.of(TINYINT, (byte) 2, INT, 2),
+ Tuple4.of(TINYINT, (byte) 3, BIGINT, 3L),
+ Tuple4.of(TINYINT, (byte) 4, DECIMAL, decOf(4)),
+ Tuple4.of(TINYINT, (byte) 5, FLOAT, 5.0f),
+ Tuple4.of(TINYINT, (byte) 6, DOUBLE, 6.0),
+ Tuple4.of(TINYINT, (byte) 7, STRING, binStrOf("7")),
+
+ // From SMALLINT
+ Tuple4.of(SMALLINT, (short) 1, SMALLINT, (short) 1),
+ Tuple4.of(SMALLINT, (short) 2, INT, 2),
+ Tuple4.of(SMALLINT, (short) 3, BIGINT, 3L),
+ Tuple4.of(SMALLINT, (short) 4, DECIMAL, decOf(4)),
+ Tuple4.of(SMALLINT, (short) 5, FLOAT, 5.0f),
+ Tuple4.of(SMALLINT, (short) 6, DOUBLE, 6.0),
+ Tuple4.of(SMALLINT, (short) 7, STRING, binStrOf("7")),
+
+ // From INT
+ Tuple4.of(INT, 2, INT, 2),
+ Tuple4.of(INT, 3, BIGINT, 3L),
+ Tuple4.of(INT, 4, DECIMAL, decOf(4)),
+ Tuple4.of(INT, 5, FLOAT, 5.0f),
+ Tuple4.of(INT, 6, DOUBLE, 6.0),
+ Tuple4.of(INT, 7, STRING, binStrOf("7")),
+
+ // From BIGINT
+ Tuple4.of(BIGINT, 3L, BIGINT, 3L),
+ Tuple4.of(BIGINT, 4L, DECIMAL, decOf(4)),
+ Tuple4.of(BIGINT, 5L, FLOAT, 5.0f),
+ Tuple4.of(BIGINT, 6L, DOUBLE, 6.0),
+ Tuple4.of(BIGINT, 7L, STRING, binStrOf("7")),
+
+ // From DECIMAL
+ Tuple4.of(DECIMAL, decOf(4), DECIMAL, decOf(4)),
+ Tuple4.of(DECIMAL, decOf(5), FLOAT, 5.0f),
+ Tuple4.of(DECIMAL, decOf(6), DOUBLE, 6.0),
+ Tuple4.of(DECIMAL, decOf(7), STRING, binStrOf("7")),
+
+ // From FLOAT
+ Tuple4.of(FLOAT, 5.0f, FLOAT, 5.0f),
+ Tuple4.of(FLOAT, 6.0f, DOUBLE, 6.0),
+ Tuple4.of(FLOAT, 7.0f, STRING, binStrOf("7.0")),
+
+ // From DOUBLE
+ Tuple4.of(DOUBLE, 6.0f, DOUBLE, 6.0),
+ Tuple4.of(DOUBLE, 7.0f, STRING, binStrOf("7.0")),
+
+ // From STRING
+ Tuple4.of(STRING, binStrOf("AtoZ"), STRING, binStrOf("AtoZ")),
+ Tuple4.of(STRING, binStrOf("lie"), STRING, binStrOf("lie")),
+
+ // From CHAR
+ Tuple4.of(
+ CHAR, binStrOf("les miserables"), CHAR, binStrOf("les miserables")),
+ Tuple4.of(CHAR, binStrOf("notre dame"), STRING, binStrOf("notre dame")),
+
+ // From Binary
+ Tuple4.of(BINARY, binOf("les miserables"), BINARY, binOf("les miserables")),
+ Tuple4.of(
+ BINARY, binOf("notre dame"), STRING, binStrOf("bm90cmUgZGFtZQ==")),
+
+ // From BOOLEAN
+ Tuple4.of(BOOLEAN, true, BOOLEAN, true),
+ Tuple4.of(BOOLEAN, false, BOOLEAN, false),
+ Tuple4.of(BOOLEAN, true, STRING, binStrOf("true")),
+ Tuple4.of(BOOLEAN, false, STRING, binStrOf("false")),
+
+ // From DATE
+ Tuple4.of(DATE, dateOf(2017, 1, 1), DATE, dateOf(2017, 1, 1)),
+ Tuple4.of(DATE, dateOf(2018, 2, 2), TIMESTAMP, tsOf("2018", "02", "02")),
+ Tuple4.of(
+ DATE,
+ dateOf(2019, 3, 3),
+ TIMESTAMP_LTZ,
+ ltzTsOf("2019", "03", "03")),
+ Tuple4.of(
+ DATE, dateOf(2020, 4, 4), TIMESTAMP_TZ, zTsOf("2020", "04", "04")),
+ Tuple4.of(DATE, dateOf(2021, 5, 5), STRING, binStrOf("2021-05-05")),
+
+ // From TIME
+ Tuple4.of(TIME, timeOf(21, 48, 25), TIME, timeOf(21, 48, 25)),
+ Tuple4.of(TIME, timeOf(21, 48, 25), STRING, binStrOf("21:48:25")),
+
+ // From TIMESTAMP
+ Tuple4.of(
+ TIMESTAMP,
+ tsOf("2022", "06", "06"),
+ TIMESTAMP,
+ tsOf("2022", "06", "06")),
+ Tuple4.of(
+ TIMESTAMP,
+ tsOf("2023", "07", "07"),
+ TIMESTAMP_LTZ,
+ ltzTsOf("2023", "07", "07")),
+ Tuple4.of(
+ TIMESTAMP,
+ tsOf("2024", "08", "08"),
+ TIMESTAMP_TZ,
+ zTsOf("2024", "08", "08")),
+ Tuple4.of(
+ TIMESTAMP,
+ tsOf("2025", "09", "09"),
+ STRING,
+ binStrOf("2025-09-09T00:00")),
+
+ // From TIMESTAMP_LTZ
+ Tuple4.of(
+ TIMESTAMP_LTZ,
+ ltzTsOf("2026", "10", "10"),
+ TIMESTAMP_LTZ,
+ ltzTsOf("2026", "10", "10")),
+ Tuple4.of(
+ TIMESTAMP_LTZ,
+ ltzTsOf("2027", "11", "11"),
+ TIMESTAMP_TZ,
+ zTsOf("2027", "11", "11")),
+ Tuple4.of(
+ TIMESTAMP_LTZ,
+ ltzTsOf("2028", "12", "12"),
+ STRING,
+ binStrOf("2028-12-12T00:00")),
+
+ // From TIMESTAMP_TZ
+ Tuple4.of(
+ TIMESTAMP_TZ,
+ zTsOf("2018", "01", "01"),
+ TIMESTAMP_TZ,
+ zTsOf("2018", "01", "01")),
+ Tuple4.of(
+ TIMESTAMP_TZ,
+ zTsOf("2019", "02", "02"),
+ STRING,
+ binStrOf("2019-02-02T00:00:00Z")));
+
+ conversionExpects.forEach(
+ rule ->
+ Assertions.assertThat(coerceObject("UTC", rule.f1, rule.f0, rule.f2))
+ .as("Try coercing %s (%s) to %s type", rule.f1, rule.f0, rule.f2)
+ .isEqualTo(rule.f3));
+ }
+
+ @Test
+ void testCoerceRow() {
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17)),
+ Arrays.asList(2L, binStrOf("Bob"))))
+ .as("test identical schema")
+ .containsExactly(2L, binStrOf("Bob"));
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT),
+ Arrays.asList(binStrOf("Cecily"), 3L)))
+ .as("swapping sequence is ok")
+ .containsExactly(3L, binStrOf("Cecily"));
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT),
+ Collections.singletonList(4L)))
+ .as("test a wider upcoming schema")
+ .containsExactly(4L, null);
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", STRING),
+ of("id", BIGINT, "name", VARCHAR(17)),
+ Arrays.asList(4L, "Derrida")))
+ .as("test a wider typed upcoming schema")
+ .containsExactly(4L, binStrOf("Derrida"));
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "number", BIGINT),
+ of("id", BIGINT, "number", type),
+ Arrays.asList(5L, DUMMY_OBJECTS.get(type))))
+ .as("test fitting %s into BIGINT", type)
+ .containsExactly(5L, 17L));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, FLOAT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "number", DOUBLE),
+ of("id", BIGINT, "number", type),
+ Arrays.asList(6L, DUMMY_OBJECTS.get(type))))
+ .as("test fitting %s into DOUBLE", type)
+ .containsExactly(6L, 17.0));
+
+ // Test coercing with NULL
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT, "name", VARCHAR(17)),
+ Arrays.asList(2L, null)))
+ .as("test identical schema")
+ .containsExactly(2L, null);
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("name", VARCHAR(17), "id", BIGINT),
+ Arrays.asList(null, 3L)))
+ .as("swapping sequence is ok")
+ .containsExactly(3L, null);
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", VARCHAR(17)),
+ of("id", BIGINT),
+ Collections.singletonList(4L)))
+ .as("test a wider upcoming schema")
+ .containsExactly(4L, null);
+
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "name", STRING),
+ of("id", BIGINT, "name", VARCHAR(17)),
+ Arrays.asList(4L, null)))
+ .as("test a wider typed upcoming schema")
+ .containsExactly(4L, null);
+
+ Stream.of(TINYINT, SMALLINT, INT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "number", BIGINT),
+ of("id", BIGINT, "number", type),
+ Arrays.asList(5L, null)))
+ .as("test fitting %s into BIGINT", type)
+ .containsExactly(5L, null));
+
+ Stream.of(TINYINT, SMALLINT, INT, BIGINT, DECIMAL, FLOAT)
+ .forEach(
+ type ->
+ Assertions.assertThat(
+ coerceRow(
+ "UTC",
+ of("id", BIGINT, "number", DOUBLE),
+ of("id", BIGINT, "number", type),
+ Arrays.asList(6L, null)))
+ .as("test fitting %s into DOUBLE", type)
+ .containsExactly(6L, null));
+ }
+
+ @Test
+ void testGetLeastCommonType() {
+ // To-be-merged types are:
+ // STRING, CHAR, VARCHAR, BINARY, VARBINARY, TINYINT, SMALLINT, INT, BIGINT,
+ // DECIMAL, FLOAT, DOUBLE, TIMESTAMP, TIMESTAMP_LTZ, TIMESTAMP_TZ, TIME, ROW, ARRAY,
+ // MAP
+
+ assertTypeMergingVector(
+ STRING,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ CHAR,
+ Arrays.asList(
+ STRING, CHAR, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ VARCHAR,
+ Arrays.asList(
+ STRING, STRING, VARCHAR, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ BINARY,
+ Arrays.asList(
+ STRING, STRING, STRING, BINARY, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ VARBINARY,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, VARBINARY, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ // 8-bit TINYINT could fit into FLOAT (24 sig bits) or DOUBLE (53 sig bits)
+ assertTypeMergingVector(
+ TINYINT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, TINYINT, SMALLINT, INT, BIGINT,
+ DECIMAL, FLOAT, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ // 16-bit SMALLINT could fit into FLOAT (24 sig bits) or DOUBLE (53 sig bits)
+ assertTypeMergingVector(
+ SMALLINT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, SMALLINT, SMALLINT, INT, BIGINT,
+ DECIMAL, FLOAT, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ // 32-bit INT could fit into DOUBLE (53 sig bits)
+ assertTypeMergingVector(
+ INT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, INT, INT, INT, BIGINT, DECIMAL,
+ DOUBLE, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING));
+
+ assertTypeMergingVector(
+ BIGINT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, BIGINT, BIGINT, BIGINT, BIGINT,
+ DECIMAL, DOUBLE, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ DECIMAL,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, DECIMAL, DECIMAL, DECIMAL, DECIMAL,
+ DECIMAL, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ FLOAT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, FLOAT, FLOAT, DOUBLE, DOUBLE,
+ STRING, FLOAT, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ DOUBLE,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, DOUBLE, DOUBLE, DOUBLE, DOUBLE,
+ STRING, DOUBLE, DOUBLE, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ TIMESTAMP,
+ Arrays.asList(
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ TIMESTAMP,
+ TIMESTAMP_LTZ,
+ TIMESTAMP_TZ,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING));
+
+ assertTypeMergingVector(
+ TIMESTAMP_LTZ,
+ Arrays.asList(
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ TIMESTAMP_LTZ,
+ TIMESTAMP_LTZ,
+ TIMESTAMP_TZ,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING));
+
+ assertTypeMergingVector(
+ TIMESTAMP_TZ,
+ Arrays.asList(
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ TIMESTAMP_TZ,
+ TIMESTAMP_TZ,
+ TIMESTAMP_TZ,
+ STRING,
+ STRING,
+ STRING,
+ STRING,
+ STRING));
+
+ assertTypeMergingVector(
+ TIME,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, TIME, STRING, STRING,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ ROW,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, ROW, STRING, STRING,
+ STRING));
+
+ assertTypeMergingVector(
+ ARRAY,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, ARRAY,
+ STRING, STRING));
+
+ assertTypeMergingVector(
+ MAP,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, MAP,
+ STRING));
+
+ assertTypeMergingVector(
+ VARIANT,
+ Arrays.asList(
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
+ STRING, VARIANT));
+ }
+
+ private static void assertTypeMergingVector(DataType incomingType, List resultTypes) {
+ Assertions.assertThat(ALL_TYPES)
+ .map(type -> getLeastCommonType(type, incomingType))
+ .containsExactlyElementsOf(resultTypes)
+ // Flip LHS and RHS should emit same outputs
+ .map(type -> getLeastCommonType(incomingType, type))
+ .containsExactlyElementsOf(resultTypes);
+ }
+
+ // Some testing utility methods.
+
+ private static List mergeAndDiff(
+ @Nullable Schema currentSchema, Schema upcomingSchema) {
+ Schema afterSchema = getLeastCommonSchema(currentSchema, upcomingSchema);
+ return getSchemaDifference(TABLE_ID, currentSchema, afterSchema);
+ }
+
+ private static Schema of(Object... args) {
+ List argList = new ArrayList<>(Arrays.asList(args));
+ Preconditions.checkState(argList.size() % 2 == 0);
+ Schema.Builder builder = Schema.newBuilder();
+ while (!argList.isEmpty()) {
+ String colName = (String) argList.remove(0);
+ DataType colType = (DataType) argList.remove(0);
+ builder.physicalColumn(colName, colType);
+ }
+ return builder.build();
+ }
+
+ private static DateData dateOf(int year, int month, int dayOfMonth) {
+ return DateData.fromLocalDate(LocalDate.of(year, month, dayOfMonth));
+ }
+
+ private static TimeData timeOf(int hour, int minute, int second) {
+ return TimeData.fromLocalTime(LocalTime.of(hour, minute, second));
+ }
+
+ private static TimestampData tsOf(String year, String month, String dayOfMonth) {
+ return TimestampData.fromTimestamp(
+ Timestamp.valueOf(String.format("%s-%s-%s 00:00:00", year, month, dayOfMonth)));
+ }
+
+ private static LocalZonedTimestampData ltzTsOf(String year, String month, String dayOfMonth) {
+ return LocalZonedTimestampData.fromEpochMillis(
+ Instant.parse(String.format("%s-%s-%sT00:00:00Z", year, month, dayOfMonth))
+ .toEpochMilli());
+ }
+
+ private static ZonedTimestampData zTsOf(String year, String month, String dayOfMonth) {
+ return ZonedTimestampData.fromZonedDateTime(
+ ZonedDateTime.ofInstant(
+ Instant.parse(String.format("%s-%s-%sT00:00:00Z", year, month, dayOfMonth)),
+ ZoneId.of("UTC")));
+ }
+
+ private static DecimalData decOf(long value) {
+ return DecimalData.fromBigDecimal(
+ BigDecimal.valueOf(value), DecimalType.MAX_PRECISION, DecimalType.DEFAULT_SCALE);
+ }
+
+ private static BinaryStringData binStrOf(String str) {
+ return BinaryStringData.fromString(str);
+ }
+
+ private static byte[] binOf(String str) {
+ return str.getBytes();
+ }
+}
diff --git a/flink-cdc-common-2.x/src/test/resources/log4j2-test.properties b/flink-cdc-common-2.x/src/test/resources/log4j2-test.properties
new file mode 100644
index 00000000000..0d45bab8011
--- /dev/null
+++ b/flink-cdc-common-2.x/src/test/resources/log4j2-test.properties
@@ -0,0 +1,25 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# Set root logger level to ERROR to not flood build logs
+# set manually to INFO for debugging purposes
+rootLogger.level = ERROR
+rootLogger.appenderRef.test.ref = TestLogger
+
+appender.testlogger.name = TestLogger
+appender.testlogger.type = CONSOLE
+appender.testlogger.target = SYSTEM_ERR
+appender.testlogger.layout.type = PatternLayout
+appender.testlogger.layout.pattern = %-4r [%t] %-5p %c - %m%n
diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/types/VariantType.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/types/VariantType.java
index d07907c44b9..70ab05a7d72 100644
--- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/types/VariantType.java
+++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/types/VariantType.java
@@ -17,7 +17,7 @@
package org.apache.flink.cdc.common.types;
-import org.apache.flink.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
import java.util.Collections;
import java.util.List;
diff --git a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/ThreadLocalCache.java b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/ThreadLocalCache.java
index f4b8101ed60..03aef691671 100644
--- a/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/ThreadLocalCache.java
+++ b/flink-cdc-common/src/main/java/org/apache/flink/cdc/common/utils/ThreadLocalCache.java
@@ -17,7 +17,7 @@
package org.apache.flink.cdc.common.utils;
-import org.apache.flink.annotation.Internal;
+import org.apache.flink.cdc.common.annotation.Internal;
import java.util.LinkedHashMap;
import java.util.Map;
diff --git a/pom.xml b/pom.xml
index 61d657a91ce..b740dd7cc89 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,6 +35,7 @@ limitations under the License.
flink-cdc-cli
flink-cdc-common
+ flink-cdc-common-2.x
flink-cdc-composer
flink-cdc-dist
flink-cdc-connect
@@ -67,8 +68,13 @@ limitations under the License.
true
- 1.20.3
+ 1.20.3
+ 2.2.0
+ ${flink.1.x.version}
1.20
+ 31.1-jre-17.0
+ 33.4.0-jre-20.0
+ ${flink.1.x.shaded.guava.version}
17.0
1.9.8.Final
3.2.0
@@ -289,7 +295,7 @@ limitations under the License.
org.apache.flink
flink-shaded-guava
- 31.1-jre-${flink.shaded.version}
+ ${shaded.guava.version}