loserwang1024
diff --git a/‎fluss-flink/fluss-flink-1.18/src/main/java/org/apache/fluss/flink/adapter/FlinkCompatibilityUtil.java‎
Lines changed: 34 additions & 0 deletions b/‎fluss-flink/fluss-flink-1.18/src/main/java/org/apache/fluss/flink/adapter/FlinkCompatibilityUtil.java‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/FlinkConnectorOptions.java‎
Lines changed: 18 additions & 0 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/FlinkConnectorOptions.java‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/FlinkCompatibilityUtil.java‎
Lines changed: 34 additions & 0 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/FlinkCompatibilityUtil.java‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/catalog/FlinkTableFactory.java‎
Lines changed: 12 additions & 1 deletion b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/catalog/FlinkTableFactory.java‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java‎
Lines changed: 98 additions & 25 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java‎
Lines changed: 98 additions & 25 deletions
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.adapter;
+
+import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
+
+/**
+ * This is a small util class that try to hide calls to Flink Internal or PublicEvolve interfaces as
+ * Flink can change those APIs during minor version release.
+ */
+public class FlinkCompatibilityUtil {
+
+    private FlinkCompatibilityUtil() {}
+
+    /** Get index of this subtask. TODO: remove this method when no longer support flink 1.18 */
+    public static int getIndexOfThisSubtask(StreamingRuntimeContext runtimeContext) {
+        return runtimeContext.getIndexOfThisSubtask();
+    }
+}
@@ -18,6 +18,7 @@
 package org.apache.fluss.flink;
 
 import org.apache.fluss.config.FlussConfigUtils;
+import org.apache.fluss.flink.sink.shuffle.DistributionMode;
 import org.apache.fluss.flink.utils.FlinkConversions;
 
 import org.apache.flink.configuration.ConfigOption;
@@ -125,6 +126,23 @@ public class FlinkConnectorOptions {
                                     + BUCKET_KEY.key()
                                     + "' is defined. For Primary Key table, it is enabled by default.");
 
+    public static final ConfigOption<DistributionMode> SINK_DISTRIBUTION_MODE =
+            ConfigOptions.key("sink.distribution-mode")
+                    .enumType(DistributionMode.class)
+                    .defaultValue(DistributionMode.BUCKET_SHUFFLE)
+                    .withDescription(
+                            "Defines the distribution mode for writing data to the sink. Available options are: \n"
+                                    + "- NONE: No specific distribution strategy. Data is forwarded as is.\n"
+                                    + "- BUCKET_SHUFFLE: Shuffle data by bucket ID before writing to sink. "
+                                    + "Shuffling the data with the same bucket ID to be processed by the same task "
+                                    + "can improve the efficiency of client processing and reduce resource consumption. "
+                                    + "For Log Table, bucket shuffle will only take effect when the '"
+                                    + BUCKET_KEY.key()
+                                    + "' is defined. For Primary Key table, it is enabled by default.\n"
+                                    + "- DYNAMIC_SHUFFLE: Dynamically adjust shuffle strategy based on partition key traffic patterns. "
+                                    + "This mode monitors data distribution and adjusts the shuffle behavior to balance the load. "
+                                    + "It is only supported for partitioned tables.");
+
     // --------------------------------------------------------------------------------------------
     // table storage specific options
     // --------------------------------------------------------------------------------------------
 
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.adapter;
+
+import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
+
+/**
+ * This is a small util class that try to hide calls to Flink Internal or PublicEvolve interfaces as
+ * Flink can change those APIs during minor version release.
+ */
+public class FlinkCompatibilityUtil {
+
+    private FlinkCompatibilityUtil() {}
+
+    /** Get index of this subtask. TODO: remove this method when no longer support flink 1.18 */
+    public static int getIndexOfThisSubtask(StreamingRuntimeContext runtimeContext) {
+        return runtimeContext.getTaskInfo().getIndexOfThisSubtask();
+    }
+}
@@ -23,6 +23,7 @@
 import org.apache.fluss.flink.lake.LakeFlinkCatalog;
 import org.apache.fluss.flink.lake.LakeTableFactory;
 import org.apache.fluss.flink.sink.FlinkTableSink;
+import org.apache.fluss.flink.sink.shuffle.DistributionMode;
 import org.apache.fluss.flink.source.FlinkTableSource;
 import org.apache.fluss.flink.utils.FlinkConnectorOptionsUtils;
 import org.apache.fluss.metadata.DataLakeFormat;
@@ -175,6 +176,15 @@ public DynamicTableSink createDynamicTableSink(Context context) {
         List<String> partitionKeys = resolvedCatalogTable.getPartitionKeys();
 
         RowType rowType = (RowType) context.getPhysicalRowDataType().getLogicalType();
+        DistributionMode distributionMode;
+        if (tableOptions.getOptional(FlinkConnectorOptions.SINK_DISTRIBUTION_MODE).isPresent()) {
+            distributionMode = tableOptions.get(FlinkConnectorOptions.SINK_DISTRIBUTION_MODE);
+        } else {
+            distributionMode =
+                    tableOptions.get(FlinkConnectorOptions.SINK_BUCKET_SHUFFLE)
+                            ? DistributionMode.BUCKET_SHUFFLE
+                            : DistributionMode.NONE;
+        }
 
         return new FlinkTableSink(
                 toFlussTablePath(context.getObjectIdentifier()),
@@ -190,7 +200,7 @@ public DynamicTableSink createDynamicTableSink(Context context) {
                 tableOptions.get(toFlinkOption(TABLE_DELETE_BEHAVIOR)),
                 tableOptions.get(FlinkConnectorOptions.BUCKET_NUMBER),
                 getBucketKeys(tableOptions),
-                tableOptions.get(FlinkConnectorOptions.SINK_BUCKET_SHUFFLE));
+                distributionMode);
     }
 
     @Override
@@ -216,6 +226,7 @@ public Set<ConfigOption<?>> optionalOptions() {
                                 FlinkConnectorOptions.LOOKUP_ASYNC,
                                 FlinkConnectorOptions.SINK_IGNORE_DELETE,
                                 FlinkConnectorOptions.SINK_BUCKET_SHUFFLE,
+                                FlinkConnectorOptions.SINK_DISTRIBUTION_MODE,
                                 LookupOptions.MAX_RETRIES,
                                 LookupOptions.CACHE_TYPE,
                                 LookupOptions.PARTIAL_CACHE_EXPIRE_AFTER_ACCESS,
 
@@ -20,19 +20,27 @@
 import org.apache.fluss.annotation.Internal;
 import org.apache.fluss.config.Configuration;
 import org.apache.fluss.flink.sink.serializer.FlussSerializationSchema;
+import org.apache.fluss.flink.sink.shuffle.DataStatisticsOperatorFactory;
+import org.apache.fluss.flink.sink.shuffle.DistributionMode;
+import org.apache.fluss.flink.sink.shuffle.StatisticsOrRecord;
+import org.apache.fluss.flink.sink.shuffle.StatisticsOrRecordChannelComputer;
+import org.apache.fluss.flink.sink.shuffle.StatisticsOrRecordTypeInformation;
 import org.apache.fluss.flink.sink.writer.AppendSinkWriter;
 import org.apache.fluss.flink.sink.writer.FlinkSinkWriter;
 import org.apache.fluss.flink.sink.writer.UpsertSinkWriter;
 import org.apache.fluss.metadata.DataLakeFormat;
 import org.apache.fluss.metadata.TablePath;
 
+import org.apache.flink.api.common.functions.FlatMapFunction;
 import org.apache.flink.api.common.operators.MailboxExecutor;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.connector.sink2.Sink;
 import org.apache.flink.api.connector.sink2.SinkWriter;
 import org.apache.flink.api.connector.sink2.WriterInitContext;
 import org.apache.flink.runtime.metrics.groups.InternalSinkWriterMetricGroup;
 import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
 import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 import org.apache.flink.table.types.logical.RowType;
 
 import javax.annotation.Nullable;
@@ -97,8 +105,9 @@ static class AppendSinkWriterBuilder<InputT>
         private final List<String> bucketKeys;
         private final List<String> partitionKeys;
         private final @Nullable DataLakeFormat lakeFormat;
-        private final boolean shuffleByBucketId;
+        private final DistributionMode shuffleMode;
         private final FlussSerializationSchema<InputT> flussSerializationSchema;
+        private final @Nullable TypeInformation<InputT> rowTypeInformation;
 
         public AppendSinkWriterBuilder(
                 TablePath tablePath,
@@ -108,17 +117,19 @@ public AppendSinkWriterBuilder(
                 List<String> bucketKeys,
                 List<String> partitionKeys,
                 @Nullable DataLakeFormat lakeFormat,
-                boolean shuffleByBucketId,
-                FlussSerializationSchema<InputT> flussSerializationSchema) {
+                DistributionMode shuffleMode,
+                FlussSerializationSchema<InputT> flussSerializationSchema,
+                @Nullable TypeInformation<InputT> rowTypeInformation) {
             this.tablePath = tablePath;
             this.flussConfig = flussConfig;
             this.tableRowType = tableRowType;
             this.numBucket = numBucket;
             this.bucketKeys = bucketKeys;
             this.partitionKeys = partitionKeys;
             this.lakeFormat = lakeFormat;
-            this.shuffleByBucketId = shuffleByBucketId;
+            this.shuffleMode = shuffleMode;
             this.flussSerializationSchema = flussSerializationSchema;
+            this.rowTypeInformation = rowTypeInformation;
         }
 
         @Override
@@ -133,20 +144,76 @@ public AppendSinkWriter<InputT> createWriter(MailboxExecutor mailboxExecutor) {
 
         @Override
         public DataStream<InputT> addPreWriteTopology(DataStream<InputT> input) {
-            // For append only sink, we will do bucket shuffle only if bucket keys are not empty.
-            if (!bucketKeys.isEmpty() && shuffleByBucketId) {
-                return partition(
-                        input,
-                        new FlinkRowDataChannelComputer<>(
-                                toFlussRowType(tableRowType),
-                                bucketKeys,
-                                partitionKeys,
-                                lakeFormat,
-                                numBucket,
-                                flussSerializationSchema),
-                        input.getParallelism());
-            } else {
-                return input;
+            switch (shuffleMode) {
+                case BUCKET_SHUFFLE:
+                    if (!bucketKeys.isEmpty()) {
+                        return partition(
+                                input,
+                                new FlinkRowDataChannelComputer<>(
+                                        toFlussRowType(tableRowType),
+                                        bucketKeys,
+                                        partitionKeys,
+                                        lakeFormat,
+                                        numBucket,
+                                        flussSerializationSchema),
+                                input.getParallelism());
+                    }
+                    return input;
+                case NONE:
+                    return input;
+                case DYNAMIC_SHUFFLE:
+                    if (partitionKeys.isEmpty()) {
+                        throw new UnsupportedOperationException(
+                                "DYNAMIC_SHUFFLE is only supported for partition tables");
+                    }
+
+                    if (rowTypeInformation == null) {
+                        throw new UnsupportedOperationException(
+                                "RowTypeInformation is required for DYNAMIC_SHUFFLE mode.");
+                    }
+                    TypeInformation<StatisticsOrRecord<InputT>> statisticsOrRecordTypeInformation =
+                            new StatisticsOrRecordTypeInformation<>(rowTypeInformation);
+                    SingleOutputStreamOperator<StatisticsOrRecord<InputT>> shuffleStream =
+                            input.transform(
+                                            "Range shuffle Collector",
+                                            statisticsOrRecordTypeInformation,
+                                            new DataStatisticsOperatorFactory<>(
+                                                    toFlussRowType(tableRowType),
+                                                    partitionKeys,
+                                                    flussSerializationSchema))
+                                    .uid("Range shuffle Collector" + tablePath)
+                                    // Set the parallelism same as input operator to encourage
+                                    // chaining
+                                    .setParallelism(input.getParallelism());
+
+                    return partition(
+                                    shuffleStream,
+                                    new StatisticsOrRecordChannelComputer<>(
+                                            toFlussRowType(tableRowType),
+                                            bucketKeys,
+                                            partitionKeys,
+                                            numBucket,
+                                            lakeFormat,
+                                            flussSerializationSchema),
+                                    input.getParallelism())
+                            .flatMap(
+                                    (FlatMapFunction<StatisticsOrRecord<InputT>, InputT>)
+                                            (statisticsOrRecord, out) -> {
+                                                if (statisticsOrRecord.hasRecord()) {
+                                                    out.collect(statisticsOrRecord.record());
+                                                }
+                                            })
+                            .uid("flat map" + tablePath)
+                            // To promote operator chaining with the downstream writer operator,
+                            // setting slot sharing group and the parallelism as default, {@link
+                            // SinkTransformationTranslator} will set the parallelism same as sink
+                            // transformation.
+                            .slotSharingGroup("shuffle-partition-custom-group")
+                            .returns(rowTypeInformation);
+
+                default:
+                    throw new UnsupportedOperationException(
+                            "Unsupported distribution mode: " + shuffleMode);
             }
         }
     }
@@ -165,7 +232,7 @@ static class UpsertSinkWriterBuilder<InputT>
         private final List<String> bucketKeys;
         private final List<String> partitionKeys;
         private final @Nullable DataLakeFormat lakeFormat;
-        private final boolean shuffleByBucketId;
+        private final DistributionMode shuffleMode;
         private final FlussSerializationSchema<InputT> flussSerializationSchema;
 
         UpsertSinkWriterBuilder(
@@ -177,7 +244,7 @@ static class UpsertSinkWriterBuilder<InputT>
                 List<String> bucketKeys,
                 List<String> partitionKeys,
                 @Nullable DataLakeFormat lakeFormat,
-                boolean shuffleByBucketId,
+                DistributionMode shuffleMode,
                 FlussSerializationSchema<InputT> flussSerializationSchema) {
             this.tablePath = tablePath;
             this.flussConfig = flussConfig;
@@ -187,7 +254,7 @@ static class UpsertSinkWriterBuilder<InputT>
             this.bucketKeys = bucketKeys;
             this.partitionKeys = partitionKeys;
             this.lakeFormat = lakeFormat;
-            this.shuffleByBucketId = shuffleByBucketId;
+            this.shuffleMode = shuffleMode;
             this.flussSerializationSchema = flussSerializationSchema;
         }
 
@@ -204,8 +271,9 @@ public UpsertSinkWriter<InputT> createWriter(MailboxExecutor mailboxExecutor) {
 
         @Override
         public DataStream<InputT> addPreWriteTopology(DataStream<InputT> input) {
-            return shuffleByBucketId
-                    ? partition(
+            switch (shuffleMode) {
+                case BUCKET_SHUFFLE:
+                    return partition(
                             input,
                             new FlinkRowDataChannelComputer<>(
                                     toFlussRowType(tableRowType),
@@ -214,8 +282,13 @@ public DataStream<InputT> addPreWriteTopology(DataStream<InputT> input) {
                                     lakeFormat,
                                     numBucket,
                                     flussSerializationSchema),
-                            input.getParallelism())
-                    : input;
+                            input.getParallelism());
+                case NONE:
+                    return input;
+                default:
+                    throw new UnsupportedOperationException(
+                            "Unsupported distribution mode: " + shuffleMode);
+            }
         }
     }
 }