diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md
index 62134b14624..2fed38b0e74 100644
--- a/docs/additional-functionality/advanced_configs.md
+++ b/docs/additional-functionality/advanced_configs.md
@@ -435,8 +435,11 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.StaticInvoke"></a>spark.rapids.sql.expression.StaticInvoke| |StaticInvoke|true|The supported types are not deterministic since it's a dynamic expression|
 <a name="sql.expression.NormalizeNaNAndZero"></a>spark.rapids.sql.expression.NormalizeNaNAndZero| |Normalize NaN and zero|true|None|
 <a name="sql.expression.ScalarSubquery"></a>spark.rapids.sql.expression.ScalarSubquery| |Subquery that will return only one row and one column|true|None|
+<a name="sql.expression.ScalaAggregator"></a>spark.rapids.sql.expression.ScalaAggregator| |User Defined Aggregator, it can choose to implement a RAPIDS accelerated interface to get better performance.|true|None|
+<a name="sql.expression.ScalaUDAF"></a>spark.rapids.sql.expression.ScalaUDAF| |User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance.|true|None|
 <a name="sql.expression.HiveGenericUDF"></a>spark.rapids.sql.expression.HiveGenericUDF| |Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance|true|None|
 <a name="sql.expression.HiveSimpleUDF"></a>spark.rapids.sql.expression.HiveSimpleUDF| |Hive UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance|true|None|
+<a name="sql.expression.HiveUDAFFunction"></a>spark.rapids.sql.expression.HiveUDAFFunction| |Hive user defined aggregate function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance|true|None|
 
 ### Execution
 
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index e5878efe5d9..6fd59e312b2 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -22886,6 +22886,202 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<td rowSpan="4">ScalaAggregator</td>
+<td rowSpan="4"> </td>
+<td rowSpan="4">User Defined Aggregator, it can choose to implement a RAPIDS accelerated interface to get better performance.</td>
+<td rowSpan="4">None</td>
+<td rowSpan="2">aggregation</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td rowSpan="2">reduction</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td rowSpan="4">ScalaUDAF</td>
+<td rowSpan="4"> </td>
+<td rowSpan="4">User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance.</td>
+<td rowSpan="4">None</td>
+<td rowSpan="2">aggregation</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td rowSpan="2">reduction</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
 <td rowSpan="2">HiveGenericUDF</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance</td>
@@ -22987,6 +23183,132 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 </tr>
+<tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
+<td rowSpan="4">HiveUDAFFunction</td>
+<td rowSpan="4"> </td>
+<td rowSpan="4">Hive user defined aggregate function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance</td>
+<td rowSpan="4">None</td>
+<td rowSpan="2">aggregation</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td rowSpan="2">reduction</td>
+<td>param</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types UDT, DAYTIME, YEARMONTH</em></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+</tr>
 </table>
 
 ## Casting
diff --git a/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java
new file mode 100644
index 00000000000..750679181e9
--- /dev/null
+++ b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tests.udf.hive;
+
+import java.util.ArrayList;
+
+import ai.rapids.cudf.ColumnVector;
+import ai.rapids.cudf.DType;
+import ai.rapids.cudf.GroupByAggregation;
+import ai.rapids.cudf.GroupByAggregationOnColumn;
+import ai.rapids.cudf.Scalar;
+import com.nvidia.spark.RapidsSimpleGroupByAggregation;
+import com.nvidia.spark.RapidsUDAF;
+
+import com.nvidia.spark.RapidsUDAFGroupByAggregation;
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.spark.sql.types.DataType;
+
+import static org.apache.spark.sql.types.DataTypes.IntegerType;
+import static org.apache.spark.sql.types.DataTypes.LongType;
+
+/** Used by hive_udaf_test */
+@SuppressWarnings("deprecation")
+public class IntLongAverageHiveUDAF extends AbstractGenericUDAFResolver implements RapidsUDAF {
+  // ===== CPU Hive UDAF Implementation =====
+  // Build an evaluator for the aggregation
+  @Override
+  public GenericUDAFEvaluator getEvaluator(TypeInfo[] args) throws SemanticException {
+    if (args.length != 1) {
+      throw new HadoopIllegalArgumentException("Exactly one argument is expected.");
+    }
+    PrimitiveObjectInspector.PrimitiveCategory inType =
+        ((PrimitiveTypeInfo) args[0]).getPrimitiveCategory();
+    if (inType == PrimitiveObjectInspector.PrimitiveCategory.LONG ||
+        inType == PrimitiveObjectInspector.PrimitiveCategory.INT) {
+      boolean isInt = inType == PrimitiveObjectInspector.PrimitiveCategory.INT;
+      return new UDAFAverageEvaluatorLong(isInt);
+    }
+    throw new HadoopIllegalArgumentException("Only support 'long' or 'int' as input");
+  }
+
+  class AverageAggBuf extends GenericUDAFEvaluator.AbstractAggregationBuffer {
+    private long sum;
+    private long count;
+  }
+
+  @SuppressWarnings("deprecation")
+  class UDAFAverageEvaluatorLong extends GenericUDAFEvaluator {
+    private final boolean isInt;
+
+    UDAFAverageEvaluatorLong(boolean isInt) {
+      this.isInt = isInt;
+    }
+
+    transient private PrimitiveObjectInspector inputOI;
+    transient private StructObjectInspector tempOI;
+
+    transient private StructField countField;
+    transient private StructField sumField;
+
+    transient private LongObjectInspector countFieldOI;
+    transient private LongObjectInspector sumFieldOI;
+
+    transient private Object[] partialRet;
+
+    @Override
+    public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
+      super.init(mode, parameters);
+      assert (parameters.length == 1);
+
+      partialRet = new Object[2];
+      partialRet[0] = new LongWritable(0);
+      partialRet[1] = new LongWritable(0);
+      // for the input
+      if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+        inputOI = (PrimitiveObjectInspector) parameters[0];
+      } else {
+        tempOI = (StructObjectInspector) parameters[0];
+        sumField = tempOI.getStructFieldRef("sum");
+        countField = tempOI.getStructFieldRef("count");
+        countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+        sumFieldOI = (LongObjectInspector) sumField.getFieldObjectInspector();
+      }
+
+      // for the output
+      if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+        // The output of a partial aggregation is a struct containing
+        // a "long" count and a "long" sum.
+        // a "long" count and a "long" sum.
+        ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+        foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        ArrayList<String> fnames = new ArrayList<String>();
+        fnames.add("sum");
+        fnames.add("count");
+        return ObjectInspectorFactory.getStandardStructObjectInspector(fnames, foi);
+      } else {
+        if (isInt) {
+          return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
+        } else {
+          return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+        }
+      }
+    }
+
+    @Override
+    public AggregationBuffer getNewAggregationBuffer() {
+      return new AverageAggBuf();
+    }
+
+    @Override
+    public void iterate(AggregationBuffer aggBuffer, Object[] parameters) throws HiveException {
+      assert (parameters.length == 1);
+      Object obj = parameters[0];
+      if (obj != null) {
+        AverageAggBuf buf = (AverageAggBuf) aggBuffer;
+        buf.count += 1;
+        if (isInt) {
+          buf.sum += PrimitiveObjectInspectorUtils.getInt(obj, inputOI);
+        } else {
+          buf.sum += PrimitiveObjectInspectorUtils.getLong(obj, inputOI);
+        }
+      }
+    }
+
+    @Override
+    public Object terminatePartial(AggregationBuffer aggBuffer) throws HiveException {
+      AverageAggBuf buf = (AverageAggBuf) aggBuffer;
+      ((LongWritable) partialRet[0]).set(buf.sum);
+      ((LongWritable) partialRet[1]).set(buf.count);
+      return partialRet;
+    }
+
+    @Override
+    public void merge(AggregationBuffer aggBuffer, Object partial) throws HiveException {
+      if (partial != null) {
+        AverageAggBuf buf = (AverageAggBuf) aggBuffer;
+        long count = countFieldOI.get(tempOI.getStructFieldData(partial, countField));
+        buf.count += count;
+        Object sumValue = tempOI.getStructFieldData(partial, sumField);
+        if (sumValue != null) {
+          buf.sum += sumFieldOI.get(sumValue);
+        }
+      }
+    }
+
+    @Override
+    public Object terminate(AggregationBuffer aggBuffer) throws HiveException {
+      AverageAggBuf buf = (AverageAggBuf) aggBuffer;
+      if (buf.count == 0) {
+        return null;
+      } else {
+        if (isInt) {
+          IntWritable result = new IntWritable(0);
+          result.set((int)(buf.sum/buf.count));
+          return result;
+        } else {
+          LongWritable result = new LongWritable(0);
+          result.set(buf.sum/buf.count);
+          return result;
+        }
+      }
+    }
+
+    @Override
+    public void reset(AggregationBuffer aggBuffer) throws HiveException {
+      AverageAggBuf buf = (AverageAggBuf) aggBuffer;
+      buf.count = 0;
+      buf.sum = 0;
+    }
+  } // end of UDAFAverageEvaluatorLong
+
+  // ===== GPU RapidsUDAF Implementation =====
+  @Override
+  public Scalar[] getDefaultValue() {
+    // Return default values for [sum, count] - these need to match the
+    // output of updateAggregation and also ideally match the output of
+    // initialize in the CPU Hive version.
+    Scalar sum = Scalar.fromNull(DType.INT64);
+    try {
+      Scalar count = Scalar.fromLong(0L);
+      return new Scalar[]{sum, count};
+    } catch (Exception e) {
+      // Make sure 'sum' is closed if any exceptions after being created, to avoid
+      // GPU memory leak.
+      sum.close();
+      throw e;
+    }
+  }
+
+  @Override
+  public ColumnVector[] preProcess(int numRows, ColumnVector[] args) {
+    try (ColumnVector inputInt = args[0]) {
+      if (args.length != 1) {
+        throw new IllegalArgumentException("Expect only one column for preProcess.");
+      }
+      return new ColumnVector[] {inputInt.castTo(DType.INT64)};
+    }
+  }
+
+  @Override
+  public ColumnVector postProcess(int numRows, ColumnVector[] args, DataType outType) {
+    ColumnVector ret = null;
+    // Final step: divide sum by count to get average
+    try (ColumnVector sumCol = args[0];
+         ColumnVector countCol = args[1]) {
+      if (args.length != 2) {
+        throw new IllegalArgumentException("Expect twos column for postProcess.");
+      }
+      ret = sumCol.div(countCol);
+    }
+    if (IntegerType.equals(outType)) {
+      try (ColumnVector longRet = ret) {
+        return longRet.castTo(DType.INT32);
+      }
+    } else {
+      return ret;
+    }
+  }
+
+  @Override
+  public RapidsUDAFGroupByAggregation updateAggregation() {
+    return new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses the default implementation (pass-through)
+
+      @Override
+      public Scalar[] reduce(int numRows, ColumnVector[] preStepData) {
+        if (preStepData.length != 1) {
+          throw new IllegalArgumentException("Expect only one column for update reduce.");
+        }
+        // For reduction (no group-by keys), compute SUM and COUNT directly
+        ColumnVector inCol = preStepData[0];
+        Scalar sum = inCol.sum();
+        try {
+          Scalar count = Scalar.fromLong(inCol.getRowCount() - inCol.getNullCount());
+          return new Scalar[]{sum, count};
+        } catch (Exception e) {
+          // Make sure that we don't leak if there is an exception.
+          sum.close();
+          throw e;
+        }
+      }
+
+      @Override
+      public GroupByAggregationOnColumn[] aggregate(int[] inputIndices) {
+        if (inputIndices.length != 1) {
+          throw new IllegalArgumentException("Expect only one column for update aggregate.");
+        }
+        // For group-by aggregation, create SUM and COUNT operations
+        int colIndex = inputIndices[0];
+        return new GroupByAggregationOnColumn[]{
+            GroupByAggregation.sum().onColumn(colIndex),
+            GroupByAggregation.count().onColumn(colIndex)
+        };
+      }
+
+      @Override
+      public ColumnVector[] postStep(ColumnVector[] aggregatedData) {
+        // cudf count() aggregate produces an integer column, so convert them
+        // both to Long to match the agg buffer type.
+        assert (aggregatedData.length == 2);
+        try (ColumnVector sumLong = aggregatedData[0];
+             ColumnVector countMaybeInt = aggregatedData[1]) {
+          ColumnVector countAsLong = countMaybeInt.castTo(DType.INT64);
+          return new ColumnVector[] {sumLong.incRefCount(), countAsLong};
+        }
+      }
+    };
+  }
+
+  @Override
+  public RapidsUDAFGroupByAggregation mergeAggregation() {
+    return new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses the default implementation (pass-through)
+
+      @Override
+      public Scalar[] reduce(int numRows, ColumnVector[] preStepData) {
+        if (preStepData.length != 2) {
+          throw new IllegalArgumentException("Expect twos column for merge reduce.");
+        }
+        ColumnVector sumCol = preStepData[0];
+        ColumnVector countCol = preStepData[1];
+        Scalar sum = sumCol.sum();
+        try {
+          Scalar count = countCol.sum();
+          return new Scalar[]{sum, count};
+        } catch (Exception e) {
+          // Make sure that we don't leak if there is an exception.
+          sum.close();
+          throw e;
+        }
+      }
+
+      @Override
+      public GroupByAggregationOnColumn[] aggregate(int[] inputIndices) {
+        if (inputIndices.length != 2) {
+          throw new IllegalArgumentException("Expect twos column for merge aggregate.");
+        }
+        return new GroupByAggregationOnColumn[]{
+            GroupByAggregation.sum().onColumn(inputIndices[0]), // sum of sums
+            GroupByAggregation.sum().onColumn(inputIndices[1])  // sum of counts
+        };
+      }
+
+      // "postStep" uses the default implementation (pass-through)
+    };
+  }
+
+  @Override
+  public DataType[] aggBufferTypes() {
+    return new DataType[]{LongType, LongType};
+  }
+}
+
diff --git a/integration_tests/src/main/python/hive_udaf_test.py b/integration_tests/src/main/python/hive_udaf_test.py
new file mode 100644
index 00000000000..2edba72ecb5
--- /dev/null
+++ b/integration_tests/src/main/python/hive_udaf_test.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from asserts import assert_gpu_and_cpu_are_equal_sql
+from data_gen import gen_df, IntegerGen, int_gen, long_gen, idfn
+from spark_session import with_spark_session, is_databricks_runtime
+from hive_udf_utils import *
+from marks import ignore_order, allow_non_gpu
+
+projected_aggs_list = [
+    "average_agg(i), average_agg(l)",
+    "average_agg(i), max(i), average_agg(l), min(l)",
+    "max(i), average_agg(i), min(l), average_agg(l)",
+    "max(i), min(l), average_agg(i), average_agg(l)",
+    "average_agg(i), max(i), min(l), average_agg(l)",
+    "max(i), average_agg(i), average_agg(l), min(l)",
+    "average_agg(i), average_agg(l), max(i), min(l)"
+]
+
+def hive_udaf_eval_fn(spark, data_gens):
+    load_hive_udf(spark, "average_agg",
+                  "com.nvidia.spark.rapids.tests.udf.hive.IntLongAverageHiveUDAF")
+    return gen_df(spark, data_gens)
+
+
+@ignore_order(local=True)
+@pytest.mark.parametrize("aggs", projected_aggs_list, ids=idfn)
+def test_groupby_with_hive_average_udaf(aggs):
+    with_spark_session(skip_if_no_hive)
+    # 'g' is the group key column, so at most 52 groups (include nulls)
+    data_gens = [["g", IntegerGen(min_val=0, max_val=50)], ["i", int_gen], ["l", long_gen]]
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark: hive_udaf_eval_fn(spark, data_gens),
+        "groupby_hive_udaf_table",
+        "SELECT g, {} FROM groupby_hive_udaf_table GROUP BY g".format(aggs),
+        conf={"spark.sql.catalogImplementation": "hive"})
+
+
+@ignore_order(local=True)
+@pytest.mark.parametrize("aggs", projected_aggs_list, ids=idfn)
+def test_reduction_with_hive_average_udaf(aggs):
+    with_spark_session(skip_if_no_hive)
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark: hive_udaf_eval_fn(spark, [["i", int_gen], ["l", long_gen]]),
+        "reduction_hive_udaf_table",
+        "SELECT {} FROM reduction_hive_udaf_table".format(aggs),
+        conf={"spark.sql.catalogImplementation": "hive"})
+
+
+@ignore_order(local=True)
+@pytest.mark.skipif(is_databricks_runtime(), reason="Databricks does not support mixed aggs")
+@allow_non_gpu("ObjectHashAggregateExec", "ProjectExec")
+@pytest.mark.parametrize("aggs", projected_aggs_list[0:2], ids=idfn)
+@pytest.mark.parametrize("repl_mode", ["partial", "final"], ids=idfn)
+def test_groupby_with_mixed_hive_average_udaf(aggs, repl_mode):
+    with_spark_session(skip_if_no_hive)
+    # 'g' is the group key column, so at most 52 groups (include nulls)
+    data_gens = [["g", IntegerGen(min_val=0, max_val=50)], ["i", int_gen], ["l", long_gen]]
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark: hive_udaf_eval_fn(spark, data_gens),
+        "groupby_hive_udaf_table",
+        "SELECT g, {} FROM groupby_hive_udaf_table GROUP BY g".format(aggs),
+        conf={"spark.sql.catalogImplementation": "hive",
+              "spark.rapids.sql.hashAgg.replaceMode": repl_mode})
diff --git a/integration_tests/src/main/python/hive_udf_utils.py b/integration_tests/src/main/python/hive_udf_utils.py
new file mode 100644
index 00000000000..ae3e863988e
--- /dev/null
+++ b/integration_tests/src/main/python/hive_udf_utils.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from conftest import skip_unless_precommit_tests
+
+
+def drop_udf(spark, udf_name):
+    spark.sql(f"DROP TEMPORARY FUNCTION IF EXISTS `{udf_name}`")
+
+
+def skip_if_no_hive(spark):
+    if spark.conf.get("spark.sql.catalogImplementation") != "hive":
+        skip_unless_precommit_tests('The Spark session does not have Hive support')
+
+
+def load_hive_udf(spark, udf_name, udf_class):
+    drop_udf(spark, udf_name)
+    # if UDF failed to load, throws AnalysisException, check if the udf class is in the class path
+    spark.sql(f"CREATE TEMPORARY FUNCTION `{udf_name}` AS '{udf_class}'")
diff --git a/integration_tests/src/main/python/row-based_udf_test.py b/integration_tests/src/main/python/row-based_udf_test.py
index e849a87b10e..d2a3017273d 100644
--- a/integration_tests/src/main/python/row-based_udf_test.py
+++ b/integration_tests/src/main/python/row-based_udf_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,19 +17,8 @@
 from asserts import assert_gpu_and_cpu_are_equal_sql
 from data_gen import *
 from spark_session import with_spark_session, is_spark_350_or_later
-from conftest import skip_unless_precommit_tests
+from hive_udf_utils import *
 
-def drop_udf(spark, udfname):
-    spark.sql("DROP TEMPORARY FUNCTION IF EXISTS {}".format(udfname))
-
-def skip_if_no_hive(spark):
-    if spark.conf.get("spark.sql.catalogImplementation") != "hive":
-        skip_unless_precommit_tests('The Spark session does not have Hive support')
-
-def load_hive_udf(spark, udfname, udfclass):
-    drop_udf(spark, udfname)
-    # if UDF failed to load, throws AnalysisException, check if the udf class is in the class path
-    spark.sql("CREATE TEMPORARY FUNCTION {} AS '{}'".format(udfname, udfclass))
 
 @pytest.mark.xfail(condition=is_spark_350_or_later(),
                    reason='https://github.com/NVIDIA/spark-rapids/issues/9064')
diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java
new file mode 100644
index 00000000000..3b5aa06742a
--- /dev/null
+++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark;
+
+import ai.rapids.cudf.GroupByAggregationOnColumn;
+
+/**
+ * Standard CUDF-based aggregation step that uses built-in CUDF aggregation
+ * operations. This handles the most common aggregation patterns and provides
+ * the best performance.
+ */
+public interface RapidsSimpleGroupByAggregation extends RapidsUDAFGroupByAggregation {
+  /**
+   * The main aggregation step that uses built-in CUDF GroupBy operations.
+   *
+   * @param inputIndices An array of ints, which are the indices of the input
+   *                     columns.
+   * @return An array of CUDF `GroupByAggregationOnColumn` instances.
+   */
+  GroupByAggregationOnColumn[] aggregate(int[] inputIndices);
+}
diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java
new file mode 100644
index 00000000000..885e621ab7f
--- /dev/null
+++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark;
+
+import ai.rapids.cudf.ColumnVector;
+import ai.rapids.cudf.Scalar;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * An interface for a GPU-accelerated User Defined Aggregate Function (UDAF).
+ * This provides the necessary methods to perform distributed group-by and
+ * reduction aggregations using CUDF.
+ */
+public interface RapidsUDAF {
+
+  /**
+   * Provides an array of default values for the aggregation result. This is
+   * used when a reduction aggregation does not have any rows to aggregate.
+   * <br/>
+   * Rapids will close these Scalars after being converted to columns.
+   * <br/>
+   * @return An array of cudf Scalar representing the output of the
+   *         updateAggregation stage of processing. The output of this
+   *         may still be merged with other tasks.
+   */
+  Scalar[] getDefaultValue();
+
+  /**
+   * A pre-processing step that transforms the input ColumnVector arguments.
+   * This method is similar to a regular RapidsUDF but returns an array of
+   * ColumnVectors. By default, this is a no-op and will just return the
+   * arguments passed in.
+   * <br/>
+   * Users should close the input columns to avoid GPU memory leak, while the
+   * returned columns will be closed by the Rapids automatically.
+   *
+   * @param numRows The number of rows to process. This is for cases
+   *               like a `COUNT(*)`, where there may be no arguments to a UDAF.
+   *               This is not common.
+   * @param args An array of ColumnVector arguments.
+   * @return An array of ColumnVectors representing the pre-processed data.
+   */
+  default ColumnVector[] preProcess(int numRows, ColumnVector[] args) {
+    return args;
+  }
+
+  /**
+   * This method returns a RapidsUDAFGroupByAggregation that defines the
+   * logic for the initial aggregation. The preProcess method will be called
+   * first, and its output will then be processed by the
+   * RapidsUDAFGroupByAggregation that this method returns.
+   * <br/>
+   * @return A RapidsUDAFGroupByAggregation that defines the aggregation
+   * logic.
+   */
+  RapidsUDAFGroupByAggregation updateAggregation();
+
+  /**
+   * This method returns a RapidsUDAFGroupByAggregation that defines how to
+   * merge two sets of aggregation results. This is used in distributed
+   * aggregation scenarios where intermediate results from different
+   * partitions are combined.
+   * <br/>
+   * @return A RapidsUDAFGroupByAggregation that defines the merge logic.
+   */
+  RapidsUDAFGroupByAggregation mergeAggregation();
+
+  /**
+   * A post-processing step that takes the result of the final aggregation
+   * and performs any necessary transformations before returning the final
+   * result. This method returns a single ColumnVector, which is the final
+   * result of the aggregation.
+   * <br/>
+   * Users should close the input columns to avoid GPU memory leak. But the
+   * returned column will be closed by the Rapids automatically.
+   * <br/>
+   * @param numRows The number of rows in the aggregated data.
+   * @param args An array of ColumnVector arguments from the final aggregation step.
+   * @param outType The final data type of this UDAF
+   * @return A single ColumnVector representing the final UDAF result.
+   */
+  ColumnVector postProcess(int numRows, ColumnVector[] args, DataType outType);
+
+  /**
+   * Data types of the aggregate buffer.
+   * <br/>
+   * It is better to align with the "bufferSchema" of "UserDefinedAggregateFunction", or
+   * the "bufferEncoder" of "Aggregator" in Spark. Otherwise, data corruption are likely
+   * to happen when some operations of this aggregation fall back to CPU. E.g. Partial
+   * aggregates runs on CPU but final aggregates runs on GPU, or vice-versa. This is rare
+   * but just in case.
+   */
+  DataType[] aggBufferTypes();
+}
diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java
new file mode 100644
index 00000000000..8ca441b65af
--- /dev/null
+++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark;
+
+import ai.rapids.cudf.ColumnVector;
+import ai.rapids.cudf.Scalar;
+
+/**
+ * Base interface for GPU-accelerated UDAF aggregation implementations. This provides
+ * the contract for different aggregation strategies.
+ * <p/>
+ * Please do not try and extend from this interface directly.
+ * `RapidsSimpleGroupByAggregation` is currently supported as interfaces to directly
+ * implement. More may be added in the future.
+ */
+public interface RapidsUDAFGroupByAggregation {
+  /**
+   * An optional pre-step for the aggregation. By default, this is a no-op
+   * and will just return the arguments passed in.
+   * <br/>
+   * Users should close the input columns to avoid GPU memory leak, but the
+   * returned columns will be closed by the Rapids automatically.
+   * <br/>
+   * @param numRows The number of rows.
+   * @param args An array of input ColumnVectors.
+   * @return An array of ColumnVectors.
+   */
+  default ColumnVector[] preStep(int numRows, ColumnVector[] args) {
+    return args;
+  }
+
+  /**
+   * Performs a reduction on the pre-step output (no keys). The
+   * output of this will be turned into a ColumnVector and possibly
+   * combined with other rows before being processed more.
+   * <br/>
+   * Rapids will close both the input columns and returned Scalars automatically.
+   *
+   * @param numRows The number of rows to process.
+   * @param preStepData The output from the preStep method.
+   * @return An array of cudf Scalars representing the reduced data.
+   */
+  Scalar[] reduce(int numRows, ColumnVector[] preStepData);
+
+  /**
+   * A post-process step for the aggregation. It takes the output of the
+   * aggregations and performs any processing needed to make it match the
+   * input to the merge aggregation.
+   * <br/>
+   * Users should close the input columns to avoid GPU memory leak, but the
+   * returned columns will be closed by the Rapids automatically.
+   *
+   * @param aggregatedData The output from the aggregation step. They should be
+   *                      closed when no longer needed.
+   * @return An array of ColumnVectors compatible with the merge step.
+   */
+  default ColumnVector[] postStep(ColumnVector[] aggregatedData) {
+    return aggregatedData;
+  }
+}
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
index edb22a0082f..7b9cc76c9a4 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
@@ -848,6 +848,8 @@ public static GpuColumnVector from(Scalar scalar, int count, DataType sparkType)
 
   /**
    * Creates a GpuColumnVector from a GpuScalar
+   * This is not recommended and will be deprecated in the future, use
+   * "from(GpuScalar scalar, int count)" instead.
    *
    * @param scalar the input GpuScalar
    * @param count the row number of the output column
@@ -855,7 +857,23 @@ public static GpuColumnVector from(Scalar scalar, int count, DataType sparkType)
    * @return a GpuColumnVector. It should be closed to avoid memory leak.
    */
   public static GpuColumnVector from(GpuScalar scalar, int count, DataType sparkType) {
-    return from(ai.rapids.cudf.ColumnVector.fromScalar(scalar.getBase(), count), sparkType);
+    if (scalar.dataType() != sparkType) {
+      throw new IllegalArgumentException("The given spark type(" + sparkType +
+          ") does not match the GpuScalar type(" + scalar.dataType() + ").");
+    }
+    return from(scalar, count);
+  }
+
+  /**
+   * Creates a GpuColumnVector from a GpuScalar
+   *
+   * @param scalar the input GpuScalar
+   * @param count the row number of the output column
+   * @return a GpuColumnVector. It should be closed to avoid memory leak.
+   */
+  public static GpuColumnVector from(GpuScalar scalar, int count) {
+    return from(ai.rapids.cudf.ColumnVector.fromScalar(scalar.getBase(), count),
+        scalar.dataType());
   }
 
   /**
@@ -1002,6 +1020,27 @@ public static ColumnarBatch dropColumns(ColumnarBatch cb, boolean[] dropList) {
     return incRefCounts(ret);
   }
 
+  /**
+   * Slice the columns from the given columnar batch at the range of [start, end).
+   * 'start' should be in the range of '[0, numColumns]', and 'end' >= 'start'.
+   * Any invalid start or end will lead to an exception.
+   * 'start == numColumns' or 'start' == 'end' will return a batch with no columns.
+   */
+  public static ColumnarBatch sliceColumns(ColumnarBatch cb, int start, int end) {
+    int numColumns = cb.numCols();
+    if (0 <= start && start <= numColumns && start <= end) {
+      int numRows = cb.numRows();
+      int realEnd = Math.min(end, numColumns);
+      ArrayList<ColumnVector> columns = new ArrayList<>();
+      for (int i = start; i < realEnd; i++) {
+        columns.add(cb.column(i));
+      }
+      ColumnarBatch ret = new ColumnarBatch(columns.toArray(new ColumnVector[0]), numRows);
+      return incRefCounts(ret);
+    }
+    throw new IllegalArgumentException("Invalid 'start' or 'end'");
+  }
+
   /**
    * Get the underlying Spark compatible columns from the batch.  This does not increment any
    * reference counts so if you want to use these columns after the batch is closed
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
index 358a869033d..9ad6ce3cea9 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.{ExplainUtils, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.aggregate.{CpuToGpuAggregateBufferConverter, CudfAggregate, GpuAggregateExpression, GpuToCpuAggregateBufferConverter}
+import org.apache.spark.sql.rapids.aggregate.{AdvAggTypeUtils, AdvancedCudfAggregate, CpuToGpuAggregateBufferConverter, CudfAggregate, GpuAdvancedAggregateFunction, GpuAggregateExpression, GpuToCpuAggregateBufferConverter}
 import org.apache.spark.sql.rapids.execution.{GpuBatchSubPartitioner, GpuShuffleMeta, TrampolineUtil}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -305,6 +305,9 @@ object AggregateUtils extends Logging {
 
     repartitionHappened
   }
+
+  type AdvancedStep = (Int, Array[GpuColumnVector]) => Array[GpuColumnVector]
+  type AdvancedAggHandler = (AdvancedStep, Int, Int)
 }
 
 /** Utility class to hold all of the metrics related to hash aggregation */
@@ -398,6 +401,10 @@ class AggHelper(
   private val postStep = new mutable.ArrayBuffer[Expression]()
   private val postStepAttr = new mutable.ArrayBuffer[Attribute]()
 
+  private val advPreStepArgs = new mutable.ArrayBuffer[Seq[Expression]]()
+  private val advPreSteps = new mutable.ArrayBuffer[AggregateUtils.AdvancedStep]()
+  private val advCudfAggregates = new mutable.ArrayBuffer[(AdvancedCudfAggregate, Int)]()
+
   // we add the grouping expression first, which should bind as pass-through
   if (forceMerge) {
     // a grouping expression can do actual computation, but we cannot do that computation again
@@ -409,91 +416,181 @@ class AggHelper(
   }
   postStep ++= groupingAttributes
   postStepAttr ++= groupingAttributes
-  postStepDataTypes ++=
-    groupingExpressions.map(_.dataType)
+  postStepDataTypes ++= groupingExpressions.map(_.dataType)
+
+  private def addAdvancedAgg(advAgg: AdvancedCudfAggregate,
+      aggOrd: Int,
+      advPreStep: AggregateUtils.AdvancedStep,
+      advPreArgs: Seq[Expression]): Unit = {
+    // pre-process is done in two steps for an advanced agg:
+    //   1) pre-project by "preArgs" to get the arguments for the advPreStep,
+    //   2) call the given "advPreStep" function
+    advPreStepArgs += advPreArgs
+    advPreSteps += advPreStep
+    advCudfAggregates += ((advAgg, aggOrd))
+  }
 
   private var ix = groupingAttributes.length
-  for (aggExp <- aggregateExpressions) {
+  aggregateExpressions.zipWithIndex.foreach { case (aggExp, ord) =>
     val aggFn = aggExp.aggregateFunction
     if ((aggExp.mode == Partial || aggExp.mode == Complete) && !forceMerge) {
-      val ordinals = (ix until ix + aggFn.updateAggregates.length)
-      aggOrdinals ++= ordinals
-      ix += ordinals.length
-      val updateAggs = aggFn.updateAggregates
-      postStepDataTypes ++= updateAggs.map(_.dataType)
-      cudfAggregates ++= updateAggs
-      preStep ++= aggFn.inputProjection
-      postStep ++= aggFn.postUpdate
-      postStepAttr ++= aggFn.postUpdateAttr
+      aggFn match {
+        case advFn: GpuAdvancedAggregateFunction =>
+          addAdvancedAgg(advFn.updateAggregate(), ord, advFn.preProcessAndClose,
+            aggFn.inputProjection)
+        case _ =>
+          val ordinals = (ix until ix + aggFn.updateAggregates.length)
+          aggOrdinals ++= ordinals
+          ix += ordinals.length
+          val updateAggs = aggFn.updateAggregates
+          postStepDataTypes ++= updateAggs.map(_.dataType)
+          cudfAggregates ++= updateAggs
+          preStep ++= aggFn.inputProjection
+          postStep ++= aggFn.postUpdate
+          postStepAttr ++= aggFn.postUpdateAttr
+      }
     } else {
-      val ordinals = (ix until ix + aggFn.mergeAggregates.length)
-      aggOrdinals ++= ordinals
-      ix += ordinals.length
-      val mergeAggs = aggFn.mergeAggregates
-      postStepDataTypes ++= mergeAggs.map(_.dataType)
-      cudfAggregates ++= mergeAggs
-      preStep ++= aggFn.preMerge
-      postStep ++= aggFn.postMerge
-      postStepAttr ++= aggFn.postMergeAttr
+      aggFn match {
+        case advFn: GpuAdvancedAggregateFunction =>
+          val advMergeAgg = advFn.mergeAggregate()
+          addAdvancedAgg(advMergeAgg, ord, advMergeAgg.preStepAndClose,
+            advFn.aggBufferAttributes)
+        case _ =>
+          val ordinals = (ix until ix + aggFn.mergeAggregates.length)
+          aggOrdinals ++= ordinals
+          ix += ordinals.length
+          val mergeAggs = aggFn.mergeAggregates
+          postStepDataTypes ++= mergeAggs.map(_.dataType)
+          cudfAggregates ++= mergeAggs
+          preStep ++= aggFn.preMerge
+          postStep ++= aggFn.postMerge
+          postStepAttr ++= aggFn.postMergeAttr
+      }
     }
   }
 
+  private val advAggStart = preStep.length
+
   // a bound expression that is applied before the cuDF aggregate
   private val preStepAttributes = if (forceMerge) {
     aggBufferAttributes
   } else {
     inputAttributes
   }
-  val preStepBound = GpuBindReferences.bindGpuReferencesTiered(preStep.toList,
+
+  // From "preStep" to "postStep"(including "aggregate/reduce"), it splits
+  // aggregates into two parts, built-in ones and advanced ones, and processes them
+  // separately. Then combines the outputs into a single output batch, and the
+  // built-in columns always come before the advanced ones. For example, there are 3
+  // aggregates,
+  //     "max(a), advanced(b), min(a)",
+  // the columns in the "preProcess" output batch is like (assume "advanced(b)" produces
+  // two columns.)
+  //   | group columns | col_max_a | col_min_a | col1_advanced_b | col2_advanced_b |
+  //
+  // Also the output batches of "reduce/aggregate" and "postStep" have the same layout.
+  val preStepBound = GpuBindReferences.bindGpuReferencesTiered(
+    preStep.toList ++ advPreStepArgs.flatten, // Append the arguments of advanced aggs
     preStepAttributes.toList, conf)
 
   // a bound expression that is applied after the cuDF aggregate
   private val postStepBound = GpuBindReferences.bindGpuReferencesTiered(postStep.toList,
     postStepAttr.toList, conf)
 
+  private val advPreStepAndArgLens = advPreSteps.zip(advPreStepArgs.map(_.length))
+
+  /**
+   * Perform the "preStep" including advanced aggregates, and return the combined
+   * result and the output columns number of every advanced aggregate.
+   * The input batch "cb" contains the pre-processed columns of the built-in
+   * aggregates and the arguments columns of the advanced aggregates.
+   *
+   * The returned column numbers are used to build the input for each advanced
+   * aggregate in the following "reduce" or "aggregate" operation. For example,
+   *   input:      | b | c |
+   *   agg:      advanced(b), advanced(c)
+   *   output:    | b_out1 | b_out2 | c_out1 |,     {2, 1}
+   *
+   * "advanced(b)" produces 2 columns while "advanced(c)" returns only one column.
+   */
+  private def preProcessWithAdvancedAggsAndClose(cb: ColumnarBatch): (ColumnarBatch, Seq[Int]) = {
+    val cols = GpuColumnVector.extractColumns(cb)
+    closeOnExcept(new ArrayBuffer[GpuColumnVector]()) { outCols =>
+      // 1) Extract the pre-processed columns and append to the output
+      outCols ++= cols.slice(0, advAggStart)
+      // 2) Extract the arguments columns and process them by the advanced aggregates.
+      // 3) Append the results to the output
+      val argsCols = cols.slice(advAggStart, cols.length)
+      closeOnExcept(argsCols) { _ =>
+        var idx = 0
+        val outLens = new ArrayBuffer[Int]()
+        advPreStepAndArgLens.foreach { case (advPreProcess, argsLen) =>
+          val endIdx = idx + argsLen
+          // advPreProcess is supposed to close the input columns "cols".
+          val args = argsCols.slice(idx, endIdx)
+          (idx until endIdx).foreach { i =>
+            argsCols(i) = null // Avoid duplicate close on exceptions
+          }
+          val ret = advPreProcess(cb.numRows(), args)
+          outCols ++= ret
+          outLens += ret.length
+          idx = endIdx
+        }
+        require(idx == argsCols.length) // all the columns should be consumed
+        (new ColumnarBatch(outCols.toArray, cb.numRows()), outLens.toSeq)
+      }
+    }
+  }
+
   /**
    * Apply the "pre" step: preMerge for merge, or pass-through in the update case
    *
    * @param toAggregateBatch - input (to the agg) batch from the child directly in the
    *                         merge case, or from the `inputProjection` in the update case.
-   * @return a pre-processed batch that can be later cuDF aggregated
+   * @param metrics - the GpuHashAggregateMetrics for this aggregation.
+   * @return a pre-processed batch that can be later cuDF aggregated, along with the
+   *         output sizes for advanced aggregates.
    */
   def preProcess(
       toAggregateBatch: ColumnarBatch,
-      metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = {
+      metrics: GpuHashAggregateMetrics): (SpillableColumnarBatch, Seq[Int]) = {
     val inputBatch = SpillableColumnarBatch(toAggregateBatch,
       SpillPriorities.ACTIVE_ON_DECK_PRIORITY)
 
     val projectedCb = NvtxRegistry.AGG_PRE_PROCESS {
       preStepBound.projectAndCloseWithRetrySingleBatch(inputBatch)
     }
-    SpillableColumnarBatch(
-      projectedCb,
-      SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+    val (retCb, outLens) = if (advPreStepAndArgLens.nonEmpty) {
+      preProcessWithAdvancedAggsAndClose(projectedCb)
+    } else {
+      (projectedCb, Seq.empty)
+    }
+    (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens)
   }
 
-  def aggregate(preProcessed: ColumnarBatch, numAggs: GpuMetric): ColumnarBatch = {
+  def aggregate(preProcessed: ColumnarBatch,
+      advArgLens: Seq[Int], numAggs: GpuMetric): (ColumnarBatch, Seq[Int]) = {
     val ret = if (groupingOrdinals.nonEmpty) {
-      performGroupByAggregation(preProcessed)
+      performGroupByAggregation(preProcessed, advArgLens)
     } else {
-      performReduction(preProcessed)
+      performReduction(preProcessed, advArgLens)
     }
     numAggs += 1
     ret
   }
 
   def aggregateWithoutCombine(metrics: GpuHashAggregateMetrics,
-      preProcessed: Iterator[SpillableColumnarBatch]): Iterator[SpillableColumnarBatch] = {
+      preProcessed: Iterator[(SpillableColumnarBatch, Seq[Int])]
+  ): Iterator[(SpillableColumnarBatch, Seq[Int])] = {
     val computeAggTime = metrics.computeAggTime
     val opTime = metrics.opTime
     val numAggs = metrics.numAggOps
-    preProcessed.flatMap { sb =>
+    preProcessed.flatMap { case (sb, argLens) =>
       withRetry(sb, splitSpillableInHalfByRows) { preProcessedAttempt =>
         NvtxIdWithMetrics(NvtxRegistry.COMPUTE_AGGREGATE, computeAggTime, opTime) {
           withResource(preProcessedAttempt.getColumnarBatch()) { cb =>
-            SpillableColumnarBatch(
-              aggregate(cb, numAggs),
-              SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+            val (retCb, outLens) = aggregate(cb, argLens, numAggs)
+            (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens)
           }
         }
       }
@@ -502,26 +599,28 @@ class AggHelper(
 
   def aggregate(
       metrics: GpuHashAggregateMetrics,
-      preProcessed: SpillableColumnarBatch): SpillableColumnarBatch = {
+      preProcessed: SpillableColumnarBatch,
+      advArgLens: Seq[Int]): (SpillableColumnarBatch, Seq[Int]) = {
     val numAggs = metrics.numAggOps
     val aggregatedSeq =
       withRetry(preProcessed, splitSpillableInHalfByRows) { preProcessedAttempt =>
         withResource(preProcessedAttempt.getColumnarBatch()) { cb =>
-          SpillableColumnarBatch(
-            aggregate(cb, numAggs),
-            SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+          val (retCb, outLens) = aggregate(cb, advArgLens, numAggs)
+          (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens)
         }
       }.toSeq
 
     // We need to merge the aggregated batches into 1 before calling post process,
     // if the aggregate code had to split on a retry
     if (aggregatedSeq.size > 1) {
-      val concatted = concatenateBatchesWithRetry(metrics, aggregatedSeq)
+      val concatted = concatenateBatchesWithRetry(metrics, aggregatedSeq.map(_._1))
       withRetryNoSplit(concatted) { attempt =>
+        val outLens = aggregatedSeq.head._2
         withResource(attempt.getColumnarBatch()) { cb =>
-          SpillableColumnarBatch(
-            aggregate(cb, numAggs),
+          val scb = SpillableColumnarBatch(
+            aggregate(cb, outLens, numAggs)._1,
             SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+          (scb, outLens)
         }
       }
     } else {
@@ -530,24 +629,39 @@ class AggHelper(
   }
 
   /**
-   * Invoke reduction functions as defined in each `CudfAggreagte`
+   * Invoke reduction functions as defined in each `CudfAggregate`
    *
    * @param preProcessed - a batch after the "pre" step
-   * @return
+   * @param advArgLens - argument sizes of advanced aggregates.
+   * @return a reduced batch and the output sizes of advanced aggregates.
    */
-  def performReduction(preProcessed: ColumnarBatch): ColumnarBatch = {
+  def performReduction(
+      preProcessed: ColumnarBatch,
+      advArgLens: Seq[Int]): (ColumnarBatch, Seq[Int]) = {
+    val reduceRowNum = 1
     NvtxRegistry.AGG_REDUCE {
-      val cvs = mutable.ArrayBuffer[GpuColumnVector]()
-      cudfAggregates.zipWithIndex.foreach { case (cudfAgg, ix) =>
-        val aggFn = cudfAgg.reductionAggregate
+      closeOnExcept(new mutable.ArrayBuffer[GpuColumnVector]()) { cvs =>
         val cols = GpuColumnVector.extractColumns(preProcessed)
-        val reductionCol = cols(aggOrdinals(ix))
-        withResource(aggFn(reductionCol.getBase)) { res =>
-          cvs += GpuColumnVector.from(
-            cudf.ColumnVector.fromScalar(res, 1), cudfAgg.dataType)
+        cudfAggregates.zip(aggOrdinals).foreach { case (cudfAgg, ix) =>
+          withResource(cudfAgg.reductionAggregate(cols(ix).getBase)) { res =>
+            cvs += GpuColumnVector.from(res, reduceRowNum, cudfAgg.dataType)
+          }
         }
+
+        // Process the advanced aggregates
+        var accLen = advAggStart
+        val outLens = new mutable.ArrayBuffer[Int]()
+        advCudfAggregates.zip(advArgLens).foreach { case ((advAgg, _), argLen) =>
+          val argCols = (accLen until accLen + argLen).map(cols).toArray
+          cvs ++= withResource(advAgg.reduce(preProcessed.numRows, argCols)) { sas =>
+            // Convert a scalar to a one row column for returning
+            outLens += sas.length
+            accLen += argLen
+            sas.safeMap(GpuColumnVector.from(_, reduceRowNum))
+          }
+        }
+        (new ColumnarBatch(cvs.toArray, reduceRowNum), outLens.toSeq)
       }
-      new ColumnarBatch(cvs.toArray, 1)
     }
   }
 
@@ -555,9 +669,12 @@ class AggHelper(
    * Used to produce a group-by aggregate
    *
    * @param preProcessed the batch after the "pre" step
-   * @return a Table that has been cuDF aggregated
+   * @param advArgLens - argument sizes of advanced aggregates.
+   * @return a Table that has been cuDF aggregated, along with the
+   *         output sizes for advanced aggregates.
    */
-  def performGroupByAggregation(preProcessed: ColumnarBatch): ColumnarBatch = {
+  def performGroupByAggregation(preProcessed: ColumnarBatch,
+      advArgLens: Seq[Int]): (ColumnarBatch, Seq[Int]) = {
     NvtxRegistry.AGG_GROUPBY {
       withResource(GpuColumnVector.from(preProcessed)) { preProcessedTbl =>
         val groupOptions = cudf.GroupByOptions.builder()
@@ -569,18 +686,128 @@ class AggHelper(
           case (cudfAgg, ord) => cudfAgg.groupByAggregate.onColumn(ord)
         }
 
+        // process advanced aggregates
+        var accArgStart = advAggStart
+        val advOutLens = new ArrayBuffer[Int]()
+        val advAggsOnColumns = advCudfAggregates.zip(advArgLens).flatMap {
+          case ((advAgg, _), argLen) =>
+            val ret = if (advAgg.supportAdvanced) {
+              // Should not come here
+              throw new UnsupportedOperationException("Advanced aggregate is " +
+                "not supported yet")
+            } else {
+              advAgg.aggregate(Array.range(accArgStart, accArgStart + argLen))
+            }
+            accArgStart += argLen
+            advOutLens += ret.length
+            ret
+        }
+
         // perform the aggregate
         val aggTbl = preProcessedTbl
           .groupBy(groupOptions, groupingOrdinals: _*)
-          .aggregate(cudfAggsOnColumn.toSeq: _*)
+          .aggregate((cudfAggsOnColumn ++ advAggsOnColumns).toSeq: _*)
 
         withResource(aggTbl) { _ =>
-          GpuColumnVector.from(aggTbl, postStepDataTypes.toArray)
+          // The output types of advanced aggs can not be predicated, instead need to
+          // infer them from the output columns.
+          val advAggTypes = (postStepDataTypes.length until aggTbl.getNumberOfColumns).map {
+            advColIx => AdvAggTypeUtils.infer(aggTbl.getColumn(advColIx))
+          }
+          (GpuColumnVector.from(aggTbl, (postStepDataTypes ++ advAggTypes).toArray),
+            advOutLens.toSeq)
         }
       }
     }
   }
 
+  /** Similar as "preProcessWithAdvancedAggsAndClose" but perform the "postStep". */
+  private def postProcessWithAdvancedAggsAndClose(
+      scb: SpillableColumnarBatch,
+      advArgLens: Seq[Int]): ColumnarBatch = {
+    // 1) Split the argument columns from the built-in aggregated columns
+    val (postedCb, argsCb) = withResource(scb) { _ =>
+      withResource(scb.getColumnarBatch()) { cb =>
+        // 2) Perform the post-process for the built-in aggregates.
+        val nonArgs = SpillableColumnarBatch(
+          GpuColumnVector.sliceColumns(cb, 0, advAggStart),
+          SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+        closeOnExcept(postStepBound.projectAndCloseWithRetrySingleBatch(nonArgs)) { proCb =>
+          (proCb, GpuColumnVector.sliceColumns(cb, advAggStart, cb.numCols()))
+        }
+      }
+    }
+    // 3) Perform the post-process for the advanced aggregates.
+    val outCols = new ArrayBuffer[Array[GpuColumnVector]]()
+    closeOnExcept(postedCb) { _ =>
+      closeOnExcept(GpuColumnVector.extractColumns(argsCb)) { cols =>
+        var idx = 0
+        try {
+          advCudfAggregates.zip(advArgLens).foreach { case ((advAgg, _), argsLen) =>
+            val endIdx = idx + argsLen
+            val args = cols.slice(idx, endIdx)
+            (idx until endIdx).foreach { i =>
+              cols(i) = null // Avoid duplicate close on exceptions
+            }
+            // postStepAndClose is supposed to close the input columns "cols".
+            outCols += advAgg.postStepAndClose(scb.numRows(), args)
+            idx = endIdx
+          }
+          require(idx == cols.length) // all the columns should be consumed
+        } catch {
+          case t: Throwable =>
+            outCols.flatten.safeClose(t)
+            throw t
+        }
+      }
+    }
+    // 4) Shuffle the columns in the original order.
+    mergeWithOriginalOrderAndClose(postedCb, outCols.toSeq)
+  }
+
+  /**
+   * The given "batch" contains only the post-processed columns of the built-in
+   * aggregates, and the "advsCols" is the output of all the advanced aggregates.
+   *
+   * For easier process with advanced aggregates, it reorders the input aggregates
+   * to separate the advanced ones from the built-in ones earlier at the "preStep".
+   * And this should break the Spark's expectation on the output.
+   *
+   * So this function will merge the two parts and reorder them back to align with
+   * the original order of the aggregates to make sure the result is safe to return
+   * to Spark.
+   */
+  private def mergeWithOriginalOrderAndClose(
+      batch: ColumnarBatch,
+      advsCols: Seq[Array[GpuColumnVector]]): ColumnarBatch = {
+    closeOnExcept(new ArrayBuffer[GpuColumnVector]()) { outCols =>
+      var colIx = groupingAttributes.length
+      // first, group columns, move to out directly
+      outCols ++= (0 until colIx).map(batch.column(_).asInstanceOf[GpuColumnVector])
+      var aggIx = 0
+      advCudfAggregates.zip(advsCols).foreach { case ((_, advOrd), advCols) =>
+        require(aggIx <= advOrd)
+        // 1 move all the columns before this advanced agg to out
+        // One non-advanced agg one column
+        val colsNum = advOrd - aggIx
+        outCols ++= (colIx until colIx + colsNum).map( ci =>
+          batch.column(ci).asInstanceOf[GpuColumnVector]
+        )
+        colIx += colsNum
+        // 2 append the advanced columns to out
+        outCols ++= advCols
+        aggIx = advOrd + 1 // + 1 to skip the advanced agg itself
+      }
+      // move remaining ones to out
+      if (colIx < batch.numCols()) {
+        outCols ++= (colIx until batch.numCols()).map(i =>
+          batch.column(i).asInstanceOf[GpuColumnVector]
+        )
+      }
+      new ColumnarBatch(outCols.toArray, batch.numRows())
+    }
+  }
+
   /**
    * Used to produce the outbound batch from the aggregate that could be
    * shuffled or could be passed through the evaluateExpression if we are in the final
@@ -588,32 +815,32 @@ class AggHelper(
    * It takes a cuDF aggregated batch and applies the "post" step:
    * postUpdate for update, or postMerge for merge
    *
-   * @param resultBatch - cuDF aggregated batch
+   * @param aggregatedSpillable - cuDF aggregated batch
+   * @param advArgLens - argument sizes of advanced aggregates.
    * @return output batch from the aggregate
    */
   def postProcess(
       aggregatedSpillable: SpillableColumnarBatch,
+      advArgLens: Seq[Int],
       metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = {
-    val postProcessed = NvtxRegistry.AGG_POST_PROCESS {
-      postStepBound.projectAndCloseWithRetrySingleBatch(aggregatedSpillable)
+    val computeTime = metrics.computeAggTime
+    val opTime = metrics.opTime
+    val postProcessed = NvtxIdWithMetrics(NvtxRegistry.POST_PROCESS_AGG, computeTime, opTime) {
+      if (advCudfAggregates.nonEmpty) {
+        postProcessWithAdvancedAggsAndClose(aggregatedSpillable, advArgLens)
+      } else {
+        postStepBound.projectAndCloseWithRetrySingleBatch(aggregatedSpillable)
+      }
     }
     SpillableColumnarBatch(
       postProcessed,
       SpillPriorities.ACTIVE_BATCHING_PRIORITY)
   }
 
-  def postProcess(input: Iterator[SpillableColumnarBatch],
+  def postProcess(
+      input: Iterator[(SpillableColumnarBatch, Seq[Int])],
       metrics: GpuHashAggregateMetrics): Iterator[SpillableColumnarBatch] = {
-    val computeAggTime = metrics.computeAggTime
-    val opTime = metrics.opTime
-    input.map { aggregated =>
-      NvtxIdWithMetrics(NvtxRegistry.POST_PROCESS_AGG, computeAggTime, opTime) {
-        val postProcessed = postStepBound.projectAndCloseWithRetrySingleBatch(aggregated)
-        SpillableColumnarBatch(
-          postProcessed,
-          SpillPriorities.ACTIVE_BATCHING_PRIORITY)
-      }
-    }
+    input.map { case (aggregated, advArgLens) => postProcess(aggregated, advArgLens, metrics) }
   }
 }
 
@@ -627,8 +854,9 @@ object GpuAggregateIterator extends Logging {
   def aggregate(
       helper: AggHelper,
       preProcessed: SpillableColumnarBatch,
-      metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = {
-    helper.aggregate(metrics, preProcessed)
+      advArgLens: Seq[Int],
+      metrics: GpuHashAggregateMetrics): (SpillableColumnarBatch, Seq[Int]) = {
+    helper.aggregate(metrics, preProcessed, advArgLens)
   }
 
   /**
@@ -652,33 +880,25 @@ object GpuAggregateIterator extends Logging {
       // in some cases casting and in others creating a struct (MERGE_M2 for instance,
       // requires a struct)
       // OOM retry happens within the projection in preProcess
-      val preProcessed = helper.preProcess(inputBatch, metrics)
+      val (preProcessed, advArgLens) = helper.preProcess(inputBatch, metrics)
 
       // 2) perform the aggregation
       // OOM retry means we could get a list of batches
-      val aggregatedSpillable = aggregate(helper, preProcessed, metrics)
+      val (aggregatedSpillable, advLens) = aggregate(helper, preProcessed, advArgLens, metrics)
 
       // 3) a post-processing step required in some scenarios, casting or picking
       // apart a struct
-      helper.postProcess(aggregatedSpillable, metrics)
+      helper.postProcess(aggregatedSpillable, advLens, metrics)
     }
   }
 
   def computeAggregateWithoutPreprocessAndClose(
       metrics: GpuHashAggregateMetrics,
-      inputBatches: Iterator[ColumnarBatch],
+      spillableInput: Iterator[(SpillableColumnarBatch, Seq[Int])],
       helper: AggHelper): Iterator[SpillableColumnarBatch] = {
-    val computeAggTime = metrics.computeAggTime
-    val opTime = metrics.opTime
     // 1) a pre-processing step required before we go into the cuDF aggregate, This has already
     // been done and is skipped
 
-    val spillableInput = inputBatches.map { cb =>
-      withResource(new MetricRange(computeAggTime, opTime)) { _ =>
-        SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_BATCHING_PRIORITY)
-      }
-    }
-
     // 2) perform the aggregation
     // OOM retry means we could get a list of batches
     val aggregatedSpillable = helper.aggregateWithoutCombine(metrics, spillableInput)
@@ -730,10 +950,7 @@ object GpuAggFirstPassIterator {
       aggHelper: AggHelper,
       metrics: GpuHashAggregateMetrics
   ): Iterator[SpillableColumnarBatch] = {
-    val preprocessProjectIter = cbIter.map { cb =>
-      val sb = SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY)
-      aggHelper.preStepBound.projectAndCloseWithRetrySingleBatch(sb)
-    }
+    val preprocessProjectIter = cbIter.map(aggHelper.preProcess(_, metrics))
     computeAggregateWithoutPreprocessAndClose(metrics, preprocessProjectIter, aggHelper)
   }
 }
@@ -752,7 +969,7 @@ object GpuAggFirstPassIterator {
 //     (GpuAverage => CudfSum/CudfCount)
 //  * boundResultReferences: project the result expressions Spark expects in the output.
 case class BoundExpressionsModeAggregates(
-    boundFinalProjections: Option[Seq[GpuExpression]],
+    boundFinalProjections: Option[(Seq[GpuExpression], Seq[AggregateUtils.AdvancedAggHandler])],
     boundResultReferences: Seq[Expression])
 
 object GpuAggFinalPassIterator {
@@ -776,9 +993,30 @@ object GpuAggFinalPassIterator {
       aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
 
     val boundFinalProjections = if (modeInfo.hasFinalMode || modeInfo.hasCompleteMode) {
-      val finalProjections = groupingAttributes ++
-        aggregateExpressions.map(_.aggregateFunction.evaluateExpression)
-      Some(GpuBindReferences.bindGpuReferences(finalProjections, aggBufferAttributes))
+      var idx = groupingAttributes.length
+      val advFns = new ArrayBuffer[AggregateUtils.AdvancedAggHandler]()
+      val finalProjections = groupingAttributes ++ aggregateExpressions.flatMap { expr =>
+        val aggFn = expr.aggregateFunction
+        val ret = aggFn match {
+          case advFn: GpuAdvancedAggregateFunction =>
+            // Collect the "argument start" and "argument length" for every advanced
+            // aggregate.
+            val postProcess: AggregateUtils.AdvancedStep = (numRows, args) => {
+              Array(advFn.postProcessAndClose(numRows, args))
+            }
+            val aggBufLen = aggFn.aggBufferAttributes.length
+            advFns += ((postProcess, idx, aggBufLen))
+            idx += aggBufLen
+            // Put the arguments of advance agg to the output batch
+            advFn.aggBufferAttributes.asInstanceOf[Seq[Expression]]
+          case _ =>
+            idx += 1
+            Seq(aggFn.evaluateExpression)
+        }
+        ret
+      }
+      Some((GpuBindReferences.bindGpuReferences(finalProjections, aggBufferAttributes),
+        advFns.toSeq))
     } else {
       None
     }
@@ -831,9 +1069,14 @@ object GpuAggFinalPassIterator {
     val opTime = metrics.opTime
     cbIter.map { batch =>
       NvtxIdWithMetrics(NvtxRegistry.FINALIZE_AGG, aggTime, opTime) {
-        val finalBatch = boundExpressions.boundFinalProjections.map { exprs =>
-          GpuProjectExec.projectAndCloseWithRetrySingleBatch(
+        val finalBatch = boundExpressions.boundFinalProjections.map { case (exprs, advFns) =>
+          val cb = GpuProjectExec.projectAndCloseWithRetrySingleBatch(
             SpillableColumnarBatch(batch, SpillPriorities.ACTIVE_BATCHING_PRIORITY), exprs)
+          if (advFns.nonEmpty) {
+            processAdvancedAggsAndClose(cb, advFns)
+          } else {
+            cb
+          }
         }.getOrElse(batch)
         val finalSCB =
           SpillableColumnarBatch(finalBatch, SpillPriorities.ACTIVE_BATCHING_PRIORITY)
@@ -849,15 +1092,63 @@ object GpuAggFinalPassIterator {
     val opTime = metrics.opTime
     sbIter.map { sb =>
       NvtxIdWithMetrics(NvtxRegistry.FINALIZE_AGG, aggTime, opTime) {
-        val finalBatch = boundExpressions.boundFinalProjections.map { exprs =>
-          SpillableColumnarBatch(
-            GpuProjectExec.projectAndCloseWithRetrySingleBatch(sb, exprs),
-            SpillPriorities.ACTIVE_BATCHING_PRIORITY)
+        val finalBatch = boundExpressions.boundFinalProjections.map { case (exprs, advFns) =>
+          val cb = GpuProjectExec.projectAndCloseWithRetrySingleBatch(sb, exprs)
+          val mixedCb = if (advFns.nonEmpty) {
+            processAdvancedAggsAndClose(cb, advFns)
+          } else {
+            cb
+          }
+          SpillableColumnarBatch(mixedCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY)
         }.getOrElse(sb)
         reorderFinalBatch(finalBatch, boundExpressions, metrics)
       }
     }
   }
+
+  /**
+   * The input batch "inputCb" contains the final columns of the built-in aggregates
+   * and the argument columns of the advanced aggregates.
+   *
+   * This function extracts the argument columns from the input batch, and perform
+   * the final "postProcess" action, then insert the result columns into the output
+   * batch at the correct position for each advanced aggregate.
+   * It also passes through the final columns of built-in aggregates to the output
+   * batch.
+   */
+  private[this] def processAdvancedAggsAndClose(inputCb: ColumnarBatch,
+      processOps: Seq[AggregateUtils.AdvancedAggHandler]): ColumnarBatch = {
+    closeOnExcept(GpuColumnVector.extractColumns(inputCb)) { cols =>
+      val outCols = new ArrayBuffer[GpuColumnVector]()
+      var idx = 0
+      closeOnExcept(outCols) { _ =>
+        processOps.foreach { case (advAggAndClose, inputStartPos, inputLen) =>
+          require(idx <= inputStartPos)
+          val endIdx = inputStartPos + inputLen
+          // 1 Move non-advanced agg columns to out
+          (idx until inputStartPos).foreach { i =>
+            outCols += cols(i)
+            cols(i) = null // avoid duplicate close on exceptions
+          }
+          val args = cols.slice(inputStartPos, endIdx)
+          (inputStartPos until endIdx).foreach { i =>
+            cols(i) = null // avoid duplicate close on exceptions
+          }
+          // 2 process the current advanced agg and append the results to out
+          outCols ++= advAggAndClose(inputCb.numRows(), args)
+          idx = endIdx
+        } // end of "processOps.foreach"
+
+        if (idx < cols.length) { // Move remaining columns to out directly
+          (idx until cols.length).foreach { i =>
+            outCols += cols(i)
+            cols(i) = null // avoid duplicate close on exceptions
+          }
+        }
+      } // end of "closeOnExcept(outCols)"
+      new ColumnarBatch(outCols.toArray, inputCb.numRows())
+    }
+  }
 }
 
 
@@ -1106,19 +1397,27 @@ class GpuMergeAggregateIterator(
    */
   private def generateEmptyReductionBatch(): ColumnarBatch = {
     val aggregateFunctions = aggregateExpressions.map(_.aggregateFunction)
-    val defaultValues =
-      aggregateFunctions.flatMap(_.initialValues)
-    // We have to grab the semaphore in this scenario, since this is a reduction that produces
-    // rows on the GPU out of empty input, meaning that if a batch has 0 rows, a new single
-    // row is getting created with 0 as the count (if count is the operation), and other default
-    // values.
-    GpuSemaphore.acquireIfNecessary(TaskContext.get())
-    val vecs = defaultValues.safeMap { ref =>
-      withResource(GpuScalar.from(ref.asInstanceOf[GpuLiteral].value, ref.dataType)) {
-        scalar => GpuColumnVector.from(scalar, 1, ref.dataType)
+    val defaultValues = new ArrayBuffer[GpuScalar]()
+    closeOnExcept(defaultValues) { _ =>
+      GpuSemaphore.acquireIfNecessary(TaskContext.get())
+      // We have to grab the semaphore in this scenario, since this is a reduction that produces
+      // rows on the GPU out of empty input, meaning that if a batch has 0 rows, a new single
+      // row is getting created with 0 as the count (if count is the operation), and other default
+      // values.
+      aggregateFunctions.foreach {
+        case advFunc: GpuAdvancedAggregateFunction =>
+          defaultValues ++= advFunc.defaultValues
+        case aggFunc =>
+          defaultValues ++= aggFunc.initialValues.safeMap { case GpuLiteral(any, dt) =>
+            GpuScalar(any, dt)
+          }
       }
     }
-    new ColumnarBatch(vecs.toArray, 1)
+    withResource(defaultValues) { _ =>
+      val numRows = 1
+      val vecs = defaultValues.toSeq.safeMap(GpuColumnVector.from(_, numRows))
+      new ColumnarBatch(vecs.toArray, numRows)
+    }
   }
 }
 
@@ -1487,7 +1786,6 @@ abstract class GpuTypedImperativeSupportedAggregateExecMeta[INPUT <: BaseAggrega
         allowSinglePassAgg = false,
         allowNonFullyAggregatedOutput = false,
         1)
-
     } else {
       super.convertToGpu()
     }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 21bc17c4b5d..00e2b2bef44 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -2524,6 +2524,8 @@ object GpuOverrides extends Logging {
             a.evalType, a.udfDeterministic, a.resultId)
         }),
     GpuScalaUDFMeta.exprMeta,
+    GpuUDAFMeta.scalaUDAFMeta,
+    GpuUDAFMeta.scalaAggregatorMeta,
     expr[Rand](
       "Generate a random column with i.i.d. uniformly distributed values in [0, 1)",
       ExprChecks.projectOnly(TypeSig.DOUBLE, TypeSig.DOUBLE,
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala
index 38af5ee1113..3f3bf991fa9 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala
@@ -20,18 +20,21 @@ import java.nio.charset.Charset
 import java.time.ZoneId
 
 import com.google.common.base.Charsets
-import com.nvidia.spark.RapidsUDF
+import com.nvidia.spark.{RapidsUDAF, RapidsUDF}
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.GpuUserDefinedFunction.udfTypeSig
+import org.apache.hadoop.hive.ql.exec.UDAF
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver
 
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, HiveTableRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.hive.{HiveGenericUDF, HiveSimpleUDF}
+import org.apache.spark.sql.hive.{HiveGenericUDF, HiveSimpleUDF, HiveUDAFFunction}
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 import org.apache.spark.sql.hive.rapids.GpuHiveTextFileUtils._
 import org.apache.spark.sql.hive.rapids.shims.HiveProviderCmdShims
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.aggregate.{AdvAggTypeUtils, CpuToGpuAggregateBufferConverter, GpuToCpuAggregateBufferConverter}
 import org.apache.spark.sql.rapids.execution.TrampolineUtil
 import org.apache.spark.sql.rapids.shims.SparkSessionUtils
 import org.apache.spark.sql.types._
@@ -128,6 +131,67 @@ class HiveProviderImpl extends HiveProviderCmdShims {
                 childExprs.map(_.convertToGpu()))
             }
           }
+        }),
+      GpuOverrides.expr[HiveUDAFFunction](
+        "Hive user defined aggregate function, the UDAF can choose to implement" +
+          " a RAPIDS accelerated interface to get better performance",
+        ExprChecks.reductionAndGroupByAgg(
+          udfTypeSig,
+          TypeSig.all,
+          repeatingParamCheck = Some(RepeatingParamCheck("param", udfTypeSig, TypeSig.all))),
+        (a, conf, p, r) => new TypedImperativeAggExprMeta[HiveUDAFFunction](a, conf, p, r) {
+
+          @scala.annotation.nowarn("msg=is deprecated")
+          private val opRapidsFunc = {
+            val hiveUDAF = if (a.isUDAFBridgeRequired) {
+              a.funcWrapper.createFunction[UDAF]()
+            } else {
+              a.funcWrapper.createFunction[AbstractGenericUDAFResolver]()
+            }
+            hiveUDAF match {
+              case rapidsUDAF: RapidsUDAF => Some(rapidsUDAF)
+              case _ => None
+            }
+          }
+
+          override def tagAggForGpu(): Unit = {
+            if (opRapidsFunc.isEmpty) {
+              willNotWorkOnGpu(s"Hive UDAF ${a.name} implemented by " +
+                s"${a.funcWrapper.functionClassName} does not provide a GPU implementation ")
+            }
+          }
+
+          override def aggBufferAttribute: AttributeReference = {
+            opRapidsFunc.map { rapidsUDAF =>
+              AdvAggTypeUtils.attrFromTypes(expr.name, rapidsUDAF.aggBufferTypes())
+            }.getOrElse(
+              // opRapidsFunc is None, so it will fallback to CPU, use the CPU one.
+              expr.aggBufferAttributes.head
+            )
+          }
+
+          override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = {
+            GpuHiveUDAFFunction(
+              a.name,
+              a.funcWrapper,
+              childExprs,
+              a.nullable,
+              a.dataType,
+              a.isUDAFBridgeRequired)
+          }
+
+          override val supportBufferConversion: Boolean = true
+
+          override def createCpuToGpuBufferConverter(): CpuToGpuAggregateBufferConverter = {
+            (child: Expression) =>
+              C2gHiveUDAFBufferTransition(child, HiveUDAFUtils.cpuAggBufferType(a),
+                aggBufferAttribute.dataType)
+          }
+
+          override def createGpuToCpuBufferConverter(): GpuToCpuAggregateBufferConverter = {
+            (child: Expression) =>
+              G2cHiveUDAFBufferTransition(child, HiveUDAFUtils.cpuAggBufferType(a))
+          }
         })
     ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
   }
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala
index 52dd46d8d80..d09b9d5ece8 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,14 +16,17 @@
 
 package org.apache.spark.sql.hive.rapids
 
-import com.nvidia.spark.RapidsUDF
+import com.nvidia.spark.{RapidsUDAF, RapidsUDF}
 import com.nvidia.spark.rapids.GpuUserDefinedFunction
-import org.apache.hadoop.hive.ql.exec.UDF
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
+import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF}
 
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow, SafeProjection, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.hive.HiveUDAFFunction
+import org.apache.spark.sql.rapids.aggregate.{CpuToGpuBufferTransition, GpuToCpuBufferTransition, GpuTypedUDAFFunctionBase}
+import org.apache.spark.sql.types.{DataType, StructType}
 
 /** Common implementation across Hive UDFs */
 trait GpuHiveUDFBase extends GpuUserDefinedFunction {
@@ -68,3 +71,93 @@ case class GpuHiveGenericUDF(
   override lazy val function: RapidsUDF = funcWrapper.createFunction[GenericUDF]()
       .asInstanceOf[RapidsUDF]
 }
+
+case class GpuHiveUDAFFunction(
+    name: String,
+    funcWrapper: HiveFunctionWrapper,
+    children: Seq[Expression],
+    nullable: Boolean,
+    dataType: DataType,
+    isUDAFBridgeRequired: Boolean) extends GpuTypedUDAFFunctionBase {
+
+  @scala.annotation.nowarn("msg=is deprecated")
+  @transient
+  override lazy val function: RapidsUDAF = if (isUDAFBridgeRequired) {
+    funcWrapper.createFunction[UDAF]().asInstanceOf[RapidsUDAF]
+  } else {
+    funcWrapper.createFunction[AbstractGenericUDAFResolver]().asInstanceOf[RapidsUDAF]
+  }
+}
+
+object HiveUDAFUtils {
+  private[rapids] def cpuAggBufferType(hiveUDAF: HiveUDAFFunction): DataType = {
+    try {
+      // 'partialResultDataType' is private, so have to get it via the reflection.
+      val pdtMethod = hiveUDAF.getClass.getMethod(
+        "org$apache$spark$sql$hive$HiveUDAFFunction$$partialResultDataType")
+      pdtMethod.invoke(hiveUDAF).asInstanceOf[DataType]
+    } catch {
+      case t: Throwable => throw new IllegalStateException("Can not get the aggregate " +
+        "buffer type via 'partialResultDataType' from CPU HiveUDAFFunction", t)
+    }
+  }
+}
+
+case class G2cHiveUDAFBufferTransition(
+    child: Expression,
+    cpuBufType: DataType) extends GpuToCpuBufferTransition {
+  private lazy val unsafeProj = if (cpuBufType.isInstanceOf[StructType]) {
+    // GPU always uses a struct type for agg buffer, but CPU does not, depending on
+    // the users implementation. So if a struct is used by CPU, then no need to
+    // flatten it here.
+    UnsafeProjection.create(Array(child.dataType))
+  } else {
+    UnsafeProjection.create(child.dataType.asInstanceOf[StructType].map(_.dataType).toArray)
+  }
+
+  private lazy val wrapRow: InternalRow => InternalRow =
+    if (cpuBufType.isInstanceOf[StructType]) {
+      // CPU expects a single struct column
+      val wrappedRow = new GenericInternalRow(1)
+      inputRow => {
+        wrappedRow.update(0, inputRow)
+        wrappedRow
+      }
+    } else {
+      identity[InternalRow]
+    }
+
+  override protected def nullSafeEval(input: Any): Array[Byte] = {
+    unsafeProj(wrapRow(input.asInstanceOf[InternalRow])).getBytes
+  }
+}
+
+case class C2gHiveUDAFBufferTransition(
+    child: Expression,
+    cpuBufType: DataType,
+    gpuType: DataType) extends CpuToGpuBufferTransition {
+  override val dataType: DataType = gpuType
+
+  // GPU always uses a struct type for agg buffer, but CPU does not, depending on
+  // the users implementation. So if a struct is used by CPU, then no need to
+  // flatten it here.
+  private lazy val projTypes = if (cpuBufType.isInstanceOf[StructType]) {
+    Array(gpuType)
+  } else {
+    gpuType.asInstanceOf[StructType].map(_.dataType).toArray
+  }
+  private lazy val row = new UnsafeRow(projTypes.length)
+  private lazy val objectProj: InternalRow => InternalRow =
+    if (cpuBufType.isInstanceOf[StructType]) {
+      inputRow =>
+        SafeProjection.create(projTypes)(inputRow).get(0, gpuType).asInstanceOf[InternalRow]
+    } else {
+      inputRow => SafeProjection.create(projTypes)(inputRow)
+    }
+
+  override protected def nullSafeEval(input: Any): InternalRow = {
+    val bytes = input.asInstanceOf[Array[Byte]]
+    row.pointTo(bytes, bytes.length)
+    objectProj(row)
+  }
+}
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala
index 3a6e940196f..e048adaf55b 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@ package org.apache.spark.sql.rapids
 
 import java.lang.invoke.SerializedLambda
 
+import scala.reflect.ClassTag
+
 import com.nvidia.spark.RapidsUDF
 import com.nvidia.spark.rapids._
 
@@ -50,7 +52,7 @@ object GpuScalaUDFMeta {
       repeatingParamCheck =
         Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))),
     (expr, conf, p, r) => new ExprMeta(expr, conf, p, r) {
-      lazy val opRapidsFunc = GpuScalaUDF.getRapidsUDFInstance(expr.function)
+      lazy val opRapidsFunc = GpuScalaUDF.getRapidsUDFInstance[RapidsUDF](expr.function)
 
       override def tagExprForGpu(): Unit = {
         if (opRapidsFunc.isEmpty && !this.conf.isCpuBasedUDFEnabled) {
@@ -615,9 +617,9 @@ object GpuScalaUDF {
    * returning the instance if it does. The lambda wrapper that Spark applies to Java UDFs will be
    * inspected if necessary to locate the user's UDF instance.
    */
-  def getRapidsUDFInstance(function: AnyRef): Option[RapidsUDF] = {
+  def getRapidsUDFInstance[F: ClassTag](function: AnyRef): Option[F] = {
     function match {
-      case f: RapidsUDF => Some(f)
+      case f: F => Some(f)
       case f =>
         try {
           // This may be a lambda that Spark's UDFRegistration wrapped around a Java UDF instance.
@@ -632,7 +634,7 @@ object GpuScalaUDF {
             val serializedLambda = writeReplace.invoke(f).asInstanceOf[SerializedLambda]
             if (serializedLambda.getCapturedArgCount == 1) {
               serializedLambda.getCapturedArg(0) match {
-                case c: RapidsUDF => Some(c)
+                case c: F => Some(c)
                 case _ => None
               }
             } else {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala
new file mode 100644
index 00000000000..c8ca2c15754
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.rapids.aggregate
+
+import ai.rapids.cudf.{ColumnVector, ColumnView, DType, GroupByAggregationOnColumn, Scalar}
+import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation}
+import com.nvidia.spark.rapids.{ExprChecks, ExprRule, GpuColumnVector, GpuExpression, GpuOverrides, GpuScalar, GpuUnsignedIntegerType, GpuUnsignedLongType, GpuUserDefinedFunction, ImperativeAggExprMeta, RepeatingParamCheck, TypedImperativeAggExprMeta, TypeSig}
+import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
+import com.nvidia.spark.rapids.RapidsPluginImplicits.{AutoCloseableProducingArray, AutoCloseableProducingSeq}
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, SafeProjection, UnsafeProjection, UnsafeRow, UserDefinedExpression}
+import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF}
+import org.apache.spark.sql.rapids.GpuScalaUDF
+import org.apache.spark.sql.types._
+
+/**
+ * Co-work with a GpuAdvancedAggregateFunction to customize the aggregate computation.
+ */
+trait AdvancedCudfAggregate extends Serializable {
+  /**
+   * Do some optional pre-process before executing the "reduce" or "aggregateXXX".
+   * The output will be fed to "reduce" or "aggregateXXX".
+   *
+   * Now this is only called for the "merge" stage of an aggregate, it plays the
+   * role similar as "preMerge" in a GpuAggregateFunction.
+   */
+  def preStepAndClose(numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = args
+  // Similar as "reductionAggregate" in the CudfAggregate
+  def reduce(numRows: Int, preStepData: Array[GpuColumnVector]): Array[GpuScalar]
+  // Similar as "groupByAggregate" in the CudfAggregate
+  def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn]
+
+  /**
+   * If true, "aggregateAdvanced" will be executed instead of "aggregate" to get more
+   * control on the aggregate computation. Otherwise, "aggregate" is always called.
+   */
+  def supportAdvanced: Boolean = false
+
+  /**
+   * An advanced version of aggregate giving more control on the aggregate computation
+   *  to perform custom aggregation on data that has been grouped by keys.
+   * The data is grouped, with offsets indicating group boundaries.
+   *
+   * @param keyOffsets  A ColumnVector containing the start offset for each group.
+   *                    The end offset for group i is `keyOffsets[i+1]` (or total
+   *                    rows for the last group).
+   * @param groupedData An array of ColumnVectors containing the actual data
+   *                    columns, sorted and organized by the grouping keys.
+   * @return An array of ColumnVectors with one row per group, containing the
+   *         aggregated results.
+   */
+  def aggregateAdvanced(
+      keyOffsets: ColumnVector,
+      groupedData: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    throw new UnsupportedOperationException("Children should override this if " +
+      "setting 'supportAdvanced' to true")
+  }
+
+  /**
+   * Do some optional post-process after executing the "reduce" or "aggregateXXX".
+   * The output will be return to Spark, so it should match the aggregate buffer schema.
+   *
+   * It plays the role similar as "postUpdate" or "postMerge" in a GpuAggregateFunction.
+   */
+  def postStepAndClose(
+      numRows: Int,
+      aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = aggregatedData
+}
+
+/**
+ * An aggregation function that supports to customize the aggregate computations for
+ * almost all the core stages of the GPU hash aggregate process.
+ *
+ * This is designed for UDAF support on GPU, but it is not a good idea to put things named
+ * "xxxUDAFxxx" directly into the GpuHashAggregateExec.
+ */
+trait GpuAdvancedAggregateFunction extends GpuAggregateFunction with UserDefinedExpression
+    with Serializable {
+  // Similar as "initialValues" in the GpuAggregateFunction
+  def defaultValues: Array[GpuScalar]
+  // Similar as "inputProjection" in the GpuAggregateFunction
+  def preProcessAndClose(numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    args
+  }
+  // Similar as "updateAggregates" in the GpuAggregateFunction
+  def updateAggregate(): AdvancedCudfAggregate
+  // Similar as "mergeAggregates" in the GpuAggregateFunction
+  def mergeAggregate(): AdvancedCudfAggregate
+  // Similar as "evaluateExpression" in the GpuAggregateFunction
+  def postProcessAndClose(numRows: Int, args: Array[GpuColumnVector]): GpuColumnVector
+
+  override final lazy val inputProjection: Seq[Expression] = children
+
+  override final lazy val initialValues: Seq[Expression] = {
+    throw new UnsupportedOperationException("Gpu advanced aggregate function" +
+      " does not support 'initialValues', call 'defaultValues' instead.")
+  }
+  override final lazy val updateAggregates: Seq[CudfAggregate] = {
+    throw new UnsupportedOperationException("Gpu advanced aggregate function" +
+      " does not support 'updateAggregates', call 'updateAggregate' instead.")
+  }
+  override final lazy val mergeAggregates: Seq[CudfAggregate] = {
+    throw new UnsupportedOperationException("Gpu advanced aggregate function" +
+      " does not support 'mergeAggregates', call 'mergeAggregate' instead.")
+  }
+  override final lazy val evaluateExpression: Expression = {
+    throw new UnsupportedOperationException("Gpu advanced aggregate function" +
+      " does not support 'evaluateExpression', call 'postProcess' instead.")
+  }
+}
+
+/**
+ * The wrapper of a RapidsUDAFGroupByAggregation to interact with the GPU hash
+ * aggregate process via GPU columns or scalars.
+ */
+private[aggregate] class UDAFCudfAggregate(
+    inputAggBufferTypes: Array[DataType],
+    udafAgg: RapidsUDAFGroupByAggregation) extends AdvancedCudfAggregate {
+
+  // Type of UDAF check is done by initialing this field when constructing an instance.
+  override val supportAdvanced: Boolean = udafAgg match {
+    // "RapidsAdvancedGroupByAggregation => true" will be supported in the future.
+    case _: RapidsSimpleGroupByAggregation => false
+    case u =>
+      throw new UnsupportedOperationException(s"${u.getClass} is NOT a child of " +
+        "'RapidsSimpleGroupByAggregation'")
+  }
+
+  override def preStepAndClose(
+      numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    closeOnExcept(udafAgg.preStep(numRows, args.map(_.getBase))) { preCols =>
+      val inputTypes = args.map(_.dataType())
+      try {
+        // try the input types first
+        preCols.zip(inputTypes).map { case (cudfCol, dt) =>
+          GpuColumnVector.fromChecked(cudfCol, dt)
+        }
+      } catch {
+        case _: IllegalArgumentException =>
+          // some transformation is made, so infer the types from the outputs
+          preCols.map { cudfCol =>
+            GpuColumnVector.fromChecked(cudfCol, AdvAggTypeUtils.infer(cudfCol))
+          }
+      }
+    }
+  }
+
+  override def reduce(numRows: Int, preStepData: Array[GpuColumnVector]): Array[GpuScalar] = {
+    closeOnExcept(udafAgg.reduce(numRows, preStepData.map(_.getBase))) { reducedRet =>
+      reducedRet.safeMap { cuScalar =>
+        GpuScalar(cuScalar, AdvAggTypeUtils.infer(cuScalar))
+      }
+    }
+  }
+
+  override def aggregateAdvanced(
+      keyOffsets: ColumnVector,
+      groupedData: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    // Should not come here, just in case
+    throw new UnsupportedOperationException("`RapidsAdvancedGroupByAggregation`" +
+      " is not supported yet")
+  }
+
+  override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = {
+    udafAgg.asInstanceOf[RapidsSimpleGroupByAggregation].aggregate(inputIndices)
+  }
+
+  override def postStepAndClose(
+      numRows: Int,
+      aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    closeOnExcept(udafAgg.postStep(aggregatedData.map(_.getBase))) { postCols =>
+      require(postCols.length == inputAggBufferTypes.length,
+        "The sizes of the 'postStep' and 'aggregationBufferTypes' outputs does " +
+          s"not match. Sizes: ${postCols.length} vs ${inputAggBufferTypes.length}")
+      try {
+        postCols.zip(inputAggBufferTypes).map { case (cudfCol, dt) =>
+          GpuColumnVector.fromChecked(cudfCol, dt)
+        }
+      } catch {
+        case iae: IllegalArgumentException =>
+          throw new RuntimeException("The output of the 'postStep' does not match " +
+            "the given aggregate buffer types", iae)
+      }
+    }
+  }
+}
+
+object AdvAggTypeUtils {
+  /**
+   * Infer the Spark type from the given cuDF ColumnView.
+   *
+   * This returned Spark type can not be used to interact with the Spark world, but
+   * only for the GPU process internally when asking for a ColumnarBatch without given
+   * Spark type. Because it may not always reflect the
+   * original Spark type. e.g.
+   *   A List of Struct column in cuDF may be either from MapType or the real List
+   *   of Struct type in Spark.
+   *   A INT32 column in cuDF may be from either YearMonthIntervalType or IntegerType
+   *   in Spark.
+   *
+   *  It is designed for the "preStep" and "reduce/aggregate" operations in our GPU
+   *  advanced aggregates.
+   */
+  def infer(col: ColumnView): DataType = col.getType match {
+    case DType.LIST =>
+      val childType = withResource(col.getChildColumnView(0))(infer)
+      ArrayType(childType, col.getNullCount > 0)
+    case DType.STRUCT =>
+      val fields = (0 until col.getNumChildren).map { i =>
+        withResource(col.getChildColumnView(i)) { chdView =>
+          val chdType = infer(chdView)
+          StructField(s"_cudf_${chdView.getType}_$i", chdType, chdView.getNullCount > 0)
+        }
+      }
+      StructType(fields)
+    case nonNested => fromNonNested(nonNested)
+  }
+
+  /**
+   * Infer the Spark type from the given cuDF Scalar, similar as infer(ColumnView).
+   */
+  def infer(scalar: Scalar): DataType = scalar.getType match {
+    case DType.LIST =>
+      val childType = withResource(scalar.getListAsColumnView)(infer)
+      ArrayType(childType, !scalar.isValid)
+    case DType.STRUCT =>
+      val fields = withResource(scalar.getChildrenFromStructScalar) { childrenViews =>
+        childrenViews.zipWithIndex.map { case (chdView, i) =>
+          val chdType = infer(chdView)
+          StructField(s"_cudf_${chdView.getType}_$i", chdType, chdView.getNullCount > 0)
+        }
+      }
+      StructType(fields)
+    case nonNested => fromNonNested(nonNested)
+  }
+
+  private def fromNonNested(dType: DType): DataType = dType match {
+    case DType.BOOL8 => BooleanType
+    case DType.INT8 => ByteType
+    case DType.INT16 => ShortType
+    case DType.INT32 => IntegerType
+    case DType.INT64 => LongType
+    case DType.FLOAT32 => FloatType
+    case DType.FLOAT64 => DoubleType
+    case DType.TIMESTAMP_DAYS => DateType
+    case DType.TIMESTAMP_MICROSECONDS => TimestampType
+    case DType.STRING => StringType
+    case DType.UINT32 => GpuUnsignedIntegerType
+    case DType.UINT64 => GpuUnsignedLongType
+    case dType if dType.isDecimalType =>
+      val precision = dType.getTypeId match {
+        case DType.DTypeEnum.DECIMAL32 => 9
+        case DType.DTypeEnum.DECIMAL64 => 18
+        case DType.DTypeEnum.DECIMAL128 => 38
+        case _ => throw new IllegalArgumentException(s"Unsupported decimal type: $dType")
+      }
+      DecimalType(precision, -dType.getScale)
+    case _ => throw new IllegalArgumentException(s"Unsupported DType: $dType")
+  }
+
+  /**
+   * Extract the children columns form the given struct column. These columns
+   * should be closed when no longer needed.
+   * The behavior is undefined if a non-struct column is specified.
+   */
+  def extractChildren(structCol: GpuColumnVector): Array[GpuColumnVector] = {
+    val dt = structCol.dataType().asInstanceOf[StructType]
+    val baseCol = structCol.getBase
+    (0 until baseCol.getNumChildren).safeMap { i =>
+      withResource(baseCol.getChildColumnView(i)) { childView =>
+        GpuColumnVector.from(childView.copyToColumnVector(), dt(i).dataType)
+      }
+    }.toArray
+  }
+
+  /**
+   * Create an attribute of struct type from the given types.
+   */
+  def attrFromTypes(
+      name: String,
+      aggBufTypes: Array[DataType]): AttributeReference = {
+    val aggType = StructType(aggBufTypes.zipWithIndex.map { case (dt, id) =>
+      StructField(s"_${name}_child$id", dt)
+    })
+    AttributeReference(s"${name}_buf", aggType)()
+  }
+}
+
+/** Common implementation for all the types of GPU UDAF interface. */
+trait GpuUDAFFunctionBase extends GpuAdvancedAggregateFunction
+  with UserDefinedExpression {
+
+  /** User's UDAF instance */
+  protected def function: RapidsUDAF
+
+  protected lazy val aggBufferTypes: Array[DataType] = function.aggBufferTypes()
+
+  override def defaultValues: Array[GpuScalar] = {
+    closeOnExcept(function.getDefaultValue) { udafDefValues =>
+      require(udafDefValues.length == aggBufferTypes.length,
+        s"The default values number (${udafDefValues.length}) is NOT equal to " +
+          s"the aggregation buffers number(${aggBufferTypes.length})")
+      udafDefValues.zip(aggBufferTypes).map { case (scalar, dt) =>
+        GpuScalar(scalar, dt)
+      }
+    }
+  }
+
+  override def preProcessAndClose(
+      numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    closeOnExcept(function.preProcess(numRows, args.map(_.getBase))) { preCols =>
+      val inputTypes = args.map(_.dataType())
+      try {
+        // try the input types first
+        preCols.zip(inputTypes).map { case (cudfCol, dt) =>
+          GpuColumnVector.fromChecked(cudfCol, dt)
+        }
+      } catch {
+        case _: IllegalArgumentException =>
+          // some transformation is made, so infer the types from the outputs
+          preCols.map { cudfCol =>
+            GpuColumnVector.fromChecked(cudfCol, AdvAggTypeUtils.infer(cudfCol))
+          }
+      }
+    }
+  }
+
+  override def postProcessAndClose(
+      numRows: Int,
+      args: Array[GpuColumnVector]): GpuColumnVector = {
+    closeOnExcept(function.postProcess(numRows, args.map(_.getBase), dataType)) { postCol =>
+      try {
+        GpuColumnVector.fromChecked(postCol, dataType)
+      } catch {
+        case iae: IllegalArgumentException =>
+          throw new RuntimeException("The output of the 'postProcess' does not match " +
+            "the UDAF result type", iae)
+      }
+    }
+  }
+
+  override def updateAggregate(): AdvancedCudfAggregate = {
+    new UDAFCudfAggregate(aggBufferTypes, function.updateAggregation())
+  }
+
+  override def mergeAggregate(): AdvancedCudfAggregate = {
+    // merge will leverage the "preStepAndClose" method of the AdvancedCudfAggregate,
+    // so specify the 'preProcessOutLen' to None.
+    new UDAFCudfAggregate(aggBufferTypes, function.mergeAggregation())
+  }
+}
+
+case class GpuScalaUDAF(
+    function: RapidsUDAF,
+    dataType: DataType,
+    children: Seq[Expression],
+    udafName: Option[String],
+    nullable: Boolean) extends GpuUDAFFunctionBase {
+
+  override val name: String = udafName.getOrElse(function.getClass.getSimpleName)
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] =
+    aggBufferTypes.zipWithIndex.map { case (dt, id) =>
+      AttributeReference(s"${name}_$id", dt)()
+    }
+}
+
+/**
+ * Co-worked with GpuTypedUDAFFunctionBase to support the process of the
+ * aggregate buffer for TypedImperativeAggregate in Spark.
+ */
+private[aggregate] class TypeUDAFCudfAggregate(
+    aggBufferAttr: AttributeReference,
+    inputAggBufferTypes: Array[DataType],
+    udafAgg: RapidsUDAFGroupByAggregation
+) extends UDAFCudfAggregate(inputAggBufferTypes, udafAgg) {
+  override def preStepAndClose(numRows: Int,
+      args: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    require((args.length == 1) && args.head.dataType().isInstanceOf[StructType],
+      "preStep expects only one struct column as the input")
+    val children = withResource(args.head)(AdvAggTypeUtils.extractChildren)
+    super.preStepAndClose(numRows, children)
+  }
+
+  override def postStepAndClose(numRows: Int,
+      aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = {
+    withResource(super.postStepAndClose(numRows, aggregatedData)) { ret =>
+      val cudfCol = ColumnVector.makeStruct(numRows.toLong, ret.map(_.getBase): _*)
+      Array(GpuColumnVector.from(cudfCol, aggBufferAttr.dataType))
+    }
+  }
+}
+
+/**
+ * Aggregate function that leverages a single struct type buffer as the aggregate
+ * buffer, to match the Spark expectation for a TypedImperativeAggregate who is
+ * using a single aggregate buffer, e.g. ScalaAggregator and HiveUDAFFunction.
+ */
+trait GpuTypedUDAFFunctionBase extends GpuUDAFFunctionBase {
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] = {
+    // The Spark expects a single aggregate buffer, so GPU has to build a
+    // single struct type with the buffer types as its children.
+    Seq(AdvAggTypeUtils.attrFromTypes(name, aggBufferTypes))
+  }
+
+  override def defaultValues: Array[GpuScalar] = {
+    val childrenCols = withResource(super.defaultValues) { defValues =>
+      defValues.safeMap(s => ColumnVector.fromScalar(s.getBase, 1))
+    }
+    val structScalar = withResource(childrenCols) { _ =>
+      Scalar.structFromColumnViews(childrenCols: _*)
+    }
+    Array(GpuScalar(structScalar, aggBufferAttributes.head.dataType))
+  }
+
+  override def updateAggregate(): AdvancedCudfAggregate = {
+    new TypeUDAFCudfAggregate(aggBufferAttributes.head, aggBufferTypes,
+      function.updateAggregation())
+  }
+
+  override def mergeAggregate(): AdvancedCudfAggregate = {
+    new TypeUDAFCudfAggregate(aggBufferAttributes.head, aggBufferTypes,
+      function.mergeAggregation())
+  }
+
+  override def postProcessAndClose(numRows: Int,
+      args: Array[GpuColumnVector]): GpuColumnVector = {
+    require((args.length == 1) && args.head.dataType().isInstanceOf[StructType],
+      "postProcess expects only one struct column as the input")
+    val children = withResource(args.head)(AdvAggTypeUtils.extractChildren)
+    super.postProcessAndClose(numRows, children)
+  }
+}
+
+case class GpuScalaAggregator(
+    function: RapidsUDAF,
+    children: Seq[Expression],
+    dataType: DataType,
+    nullable: Boolean,
+    aggregatorName: Option[String]) extends GpuTypedUDAFFunctionBase {
+
+  override val name: String = aggregatorName.getOrElse(function.getClass.getSimpleName)
+}
+
+case class C2gUDAFBufferTransition(
+    child: Expression,
+    gpuType: DataType) extends CpuToGpuBufferTransition {
+  override val dataType: DataType = gpuType
+
+  private lazy val childrenTypes = gpuType.asInstanceOf[StructType].map(_.dataType)
+  private lazy val row = new UnsafeRow(childrenTypes.length)
+  private lazy val objectProj = SafeProjection.create(childrenTypes.toArray)
+
+  override protected def nullSafeEval(input: Any): InternalRow = {
+    val bytes = input.asInstanceOf[Array[Byte]]
+    row.pointTo(bytes, bytes.length)
+    objectProj(row)
+  }
+}
+
+case class G2cUDAFBufferTransition(child: Expression) extends GpuToCpuBufferTransition {
+  private lazy val unsafeProj = UnsafeProjection.create(
+    child.dataType.asInstanceOf[StructType].map(_.dataType).toArray
+  )
+
+  override protected def nullSafeEval(input: Any): Array[Byte] = {
+    unsafeProj(input.asInstanceOf[InternalRow]).getBytes
+  }
+}
+
+object GpuUDAFMeta {
+  def scalaUDAFMeta: ExprRule[ScalaUDAF] = GpuOverrides.expr[ScalaUDAF](
+    "User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS" +
+      " accelerated interface to get better performance.",
+    ExprChecks.reductionAndGroupByAgg(
+      GpuUserDefinedFunction.udfTypeSig,
+      TypeSig.all,
+      repeatingParamCheck =
+        Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))),
+    (sUdaf, conf, p, r) => new ImperativeAggExprMeta(sUdaf, conf, p, r) {
+      private val opRapidsUDAF = GpuScalaUDF.getRapidsUDFInstance[RapidsUDAF](sUdaf.udaf)
+
+      override def tagAggForGpu(): Unit = {
+        if (opRapidsUDAF.isEmpty) {
+          val udfClass = sUdaf.udaf.getClass
+          willNotWorkOnGpu(s"${sUdaf.name} implemented by $udfClass does not " +
+            s"provide a GPU implementation")
+        }
+      }
+
+      override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = {
+        require(opRapidsUDAF.isDefined)
+        GpuScalaUDAF(
+          opRapidsUDAF.get,
+          sUdaf.dataType,
+          childExprs,
+          sUdaf.udafName,
+          sUdaf.nullable)
+      }
+    }
+  )
+
+  def scalaAggregatorMeta[IN, BUF, OUT]: ExprRule[ScalaAggregator[IN, BUF, OUT]] =
+    GpuOverrides.expr[ScalaAggregator[IN, BUF, OUT]](
+      "User Defined Aggregator, it can choose to implement a RAPIDS" +
+        " accelerated interface to get better performance.",
+      ExprChecks.reductionAndGroupByAgg(
+        GpuUserDefinedFunction.udfTypeSig,
+        TypeSig.all,
+        repeatingParamCheck =
+          Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))),
+      (sAgg, conf, p, r) => new TypedImperativeAggExprMeta(sAgg, conf, p, r) {
+        private val opRapidsUDAF = GpuScalaUDF.getRapidsUDFInstance[RapidsUDAF](sAgg.agg)
+
+        override def tagAggForGpu(): Unit = {
+          if (opRapidsUDAF.isEmpty) {
+            val udfClass = sAgg.agg.getClass
+            willNotWorkOnGpu(s"${sAgg.name} implemented by $udfClass does not " +
+              s"provide a GPU implementation")
+          }
+        }
+
+        override def aggBufferAttribute: AttributeReference = {
+          opRapidsUDAF.map { rapidsUDAF =>
+            AdvAggTypeUtils.attrFromTypes(sAgg.name, rapidsUDAF.aggBufferTypes())
+          }.getOrElse(
+            // opRapidsUDAF is None, so it will fallback to CPU, use the CPU one.
+            sAgg.aggBufferAttributes.head
+          )
+        }
+
+        override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = {
+          require(opRapidsUDAF.isDefined)
+          GpuScalaAggregator(
+            opRapidsUDAF.get,
+            childExprs,
+            sAgg.dataType,
+            sAgg.nullable,
+            sAgg.aggregatorName)
+        }
+
+        override val supportBufferConversion: Boolean = true
+
+        override def createCpuToGpuBufferConverter(): CpuToGpuAggregateBufferConverter = {
+          (child: Expression) => C2gUDAFBufferTransition(child, aggBufferAttribute.dataType)
+        }
+
+        override def createGpuToCpuBufferConverter(): GpuToCpuAggregateBufferConverter = {
+          (child: Expression) => G2cUDAFBufferTransition(child)
+        }
+      }
+    )
+}
diff --git a/tests/src/test/resources/group_strings_ints_ints.csv b/tests/src/test/resources/group_strings_ints_ints.csv
new file mode 100644
index 00000000000..332cceb096c
--- /dev/null
+++ b/tests/src/test/resources/group_strings_ints_ints.csv
@@ -0,0 +1,50 @@
+"group1",1,141
+"group2",2,139
+"group3",3,138
+"group4",4,137
+"group5",5,136
+"group1",6,135
+"group2",7,134
+"group3",8,132
+"group4",9,131
+"group5",10,130
+"group1",11,129
+"group2",12,128
+"group3",13,127
+"group4",14,126
+"group5",15,125
+"group1",16,124
+"group2",17,123
+"group3",18,121
+"group4",19,119
+"group5",21,118
+"group1",22,117
+"group2",23,116
+"group3",23,115
+"group4",50,114
+"group5",49,113
+"group1",48,112
+"group2",47,112
+"group3",46,111
+"group4",45,110
+"group5",44,109
+"group1",43,108
+"group2",42,107
+"group3",41,106
+"group4",39,105
+"group5",38,104
+"group1",37,104
+"group2",36,104
+"group3",35,103
+"group4",34,102
+"group5",33,101
+"group1",32,199
+"group2",31,188
+"group3",30,177
+"group4",29,166
+"group5",28,155
+"group1",27,144
+"group2",26,133
+"group3",25,122
+"group4",24,111
+"group5",60,100
\ No newline at end of file
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala
index 82fb1dd4154..fe44bbde83c 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,7 +71,7 @@ class HashAggregateRetrySuite
 
     // attempt a cuDF reduction
     GpuAggregateIterator.aggregate(
-      aggHelper, input, mockMetrics)
+      aggHelper, input, Seq.empty, mockMetrics)._1
   }
 
   def makeGroupByAggHelper(forceMerge: Boolean): AggHelper = {
@@ -111,7 +111,8 @@ class HashAggregateRetrySuite
     GpuAggregateIterator.aggregate(
       makeGroupByAggHelper(forceMerge = false),
       input,
-      mockMetrics)
+      Seq.empty,
+      mockMetrics)._1
   }
 
   test("computeAndAggregate reduction with retry") {
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala
new file mode 100644
index 00000000000..206303c21c8
--- /dev/null
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import ai.rapids.cudf.{ColumnVector, DType, GroupByAggregation, GroupByAggregationOnColumn, Scalar}
+import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation}
+import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{functions, Encoder, Encoders}
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructField, StructType}
+
+class ScalaAggregatorSuite extends SparkQueryCompareTestSuite {
+
+  IGNORE_ORDER_testSparkResultsAreEqual(testName = "Groupby with ScalaAggregator Average",
+      groupbyStringsIntsIntsFromCsv, repart = 7) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    df.createOrReplaceTempView("groupby_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator))
+    df.sparkSession.sql(sqlText = """
+      SELECT count(c1_int), intAverage(c1_int), max(c2_int), intAverage(c2_int)
+      FROM groupby_scala_average_udaf_test_table
+      GROUP BY key_str
+    """)
+  }
+
+  IGNORE_ORDER_testSparkResultsAreEqual(testName = "Reduction with ScalaAggregator Average",
+      groupbyStringsIntsIntsFromCsv, repart = 7) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    df.createOrReplaceTempView("reduction_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator))
+    df.sparkSession.sql(sqlText = """
+      SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int)
+      FROM reduction_scala_average_udaf_test_table
+    """)
+  }
+
+  private val emptyDfSchema = StructType(Seq(
+    StructField("key_str", StringType, nullable = true),
+    StructField("c1_int", IntegerType, nullable = true),
+    StructField("c2_int", IntegerType, nullable = true))
+  )
+
+  IGNORE_ORDER_testSparkResultsAreEqual(
+      testName = "Reduction with ScalaAggregator Average on empty dataset",
+      ss => emptyRowsDf(ss, emptyDfSchema)) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    df.createOrReplaceTempView("reduction_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator))
+    df.sparkSession.sql(sqlText =
+      """
+      SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int)
+      FROM reduction_scala_average_udaf_test_table
+    """)
+  }
+
+  Seq("partial", "final").foreach { replaceMode =>
+    val fallType = if (replaceMode == "partial") "Gpu2Cpu" else "Cpu2Gpu"
+    IGNORE_ORDER_ALLOW_NON_GPU_testSparkResultsAreEqual(
+      testName = s"Groupby with $fallType ScalaAggregator Average",
+      groupbyStringsIntsIntsFromCsv,
+      repart = 7,
+      execsAllowedNonGpu = Seq("ObjectHashAggregateExec", "ProjectExec"),
+      conf = new SparkConf().set("spark.rapids.sql.hashAgg.replaceMode", replaceMode)
+    ) { df =>
+      // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+      // the specific UDAF implementation itself.
+      df.createOrReplaceTempView("groupby_scala_average_udaf_test_table")
+      df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator))
+      df.sparkSession.sql(sqlText =
+        """
+        SELECT count(c1_int), intAverage(c1_int), max(c2_int), intAverage(c2_int)
+        FROM groupby_scala_average_udaf_test_table
+        GROUP BY key_str
+      """)
+    }
+  }
+}
+
+case class AverageBuffer(var sum: java.lang.Long, var count: Long)
+
+class IntAverageAggregator extends Aggregator[Integer, AverageBuffer, Integer] with RapidsUDAF {
+
+  // ===== CPU Spark Aggregator Implementation =====
+  // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+  override def zero: AverageBuffer = AverageBuffer(null, 0L)
+
+  // Combine two values to produce a new value. For performance, the function may
+  // modify `buffer` and return it instead of constructing a new object
+  override def reduce(buffer: AverageBuffer, data: Integer): AverageBuffer = {
+    if (data != null) {
+      buffer.sum += data
+      buffer.count += 1
+    }
+    buffer
+  }
+
+  // Merge two intermediate values
+  override def merge(b1: AverageBuffer, b2: AverageBuffer): AverageBuffer = {
+    if (b2.sum != null) {
+      b1.sum += b2.sum
+    }
+    b1.count += b2.count
+    b1
+  }
+
+  // Transform the output of the reduction/aggregation
+  override def finish(reduction: AverageBuffer): Integer = {
+    // toInt is safe since no overflows here
+    if (reduction.count == 0) null else (reduction.sum / reduction.count).toInt
+  }
+
+  // Specifies the Encoder for the intermediate value type
+  override def bufferEncoder: Encoder[AverageBuffer] = Encoders.product
+  // Specifies the Encoder for the final output value type
+  override def outputEncoder: Encoder[Integer] = Encoders.INT
+
+  // ===== GPU RapidsUDAF Implementation =====
+  override def getDefaultValue: Array[Scalar] = {
+    // Return default values for [sum, count] - these need to match the output of
+    // "updateAggregation" and also ideally match the output of initialize in the
+    // CPU version.
+    // Make sure that if we get an exception we do not leak memory
+    closeOnExcept(Scalar.fromNull(DType.INT64)) { nullScalar =>
+      Array(
+        nullScalar, // null sum (Long)
+        Scalar.fromLong(0L) // 0 count (Long)
+      )
+    }
+  }
+
+  override def preProcess(numRows: Int, args: Array[ColumnVector]): Array[ColumnVector] = {
+    require(args.length == 1)
+    withResource(args.head) { intArg =>
+      Array(intArg.castTo(DType.INT64)) // Cast int to long to avoid potential overflow
+    }
+  }
+
+  override def postProcess(numRows: Int, args: Array[ColumnVector],
+      outType: DataType): ColumnVector = {
+    // Final step: divide sum by count to get average. Perform element-wise
+    // division: sum / count.
+    // Note that if the COUNT is 0 the SUM is null.
+    // This is to close the input "args" to avoid GPU memory leak.
+    val averageCol = withResource(args) { _ =>
+      val sumCol = args(0)
+      val countCol = args(1)
+      sumCol.div(countCol)
+    }
+    withResource(averageCol) { averageCol =>
+      // Cast to integers, no overflows here.
+      averageCol.castTo(DType.INT32)
+    }
+  }
+
+  override def aggBufferTypes(): Array[DataType] = Array(LongType, LongType)
+
+  override def updateAggregation(): RapidsUDAFGroupByAggregation = {
+    new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses default implementation (pass-through)
+
+      override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = {
+        // For reduction (no group-by keys), compute SUM and COUNT directly
+        val inputCol = preStepData(0)
+        // Make sure that we don't leak if there is an exception
+        closeOnExcept(inputCol.sum()) { sum =>
+          val count = Scalar.fromLong(inputCol.getRowCount - inputCol.getNullCount)
+          Array(sum, count)
+        }
+      }
+
+      override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = {
+        // For group-by aggregation, create SUM and COUNT operations
+        val colIndex = inputIndices(0)
+        Array(
+          GroupByAggregation.sum().onColumn(colIndex),
+          GroupByAggregation.count().onColumn(colIndex)
+        )
+      }
+
+      override def postStep(aggregatedData: Array[ColumnVector]): Array[ColumnVector] = {
+        // cudf count() aggregate produces an integer column, so convert it to
+        // Long to match the agg buffer type.
+        require(aggregatedData.length == 2, "Expect two columns for postStep during update")
+        withResource(aggregatedData) { _ =>
+          Array(aggregatedData.head.incRefCount(), aggregatedData(1).castTo(DType.INT64))
+        }
+      }
+    }
+  }
+
+  override def mergeAggregation(): RapidsUDAFGroupByAggregation = {
+    new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses default implementation (pass-through)
+
+      override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = {
+        // Merge by summing both sum and count columns
+        val sumCol = preStepData(0)
+        val countCol = preStepData(1)
+
+        // Avoid leaks even if there is an exception when merging countCol
+        closeOnExcept(sumCol.sum()) { mergedSum =>
+          val mergedCount = countCol.sum()
+          Array(mergedSum, mergedCount)
+        }
+      }
+
+      override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = {
+        // Merge by summing both columns
+        Array(
+          GroupByAggregation.sum().onColumn(inputIndices(0)), // sum of sums
+          GroupByAggregation.sum().onColumn(inputIndices(1)) // sum of counts
+        )
+      }
+
+      // "postStep" uses default implementation (pass-through)
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala
new file mode 100644
index 00000000000..2ae24c0fd4e
--- /dev/null
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import ai.rapids.cudf.{ColumnVector, DType, GroupByAggregation, GroupByAggregationOnColumn, Scalar}
+import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation}
+import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructField, StructType}
+
+@scala.annotation.nowarn("msg= is deprecated")
+class ScalaUDAFSuite extends SparkQueryCompareTestSuite {
+
+  IGNORE_ORDER_testSparkResultsAreEqual(testName = "Groupby with ScalaUDAF Average",
+      groupbyStringsIntsIntsFromCsv) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    // "repartition(7)" is to avoid the Complete mode of the aggregate.
+    df.repartition(7).createOrReplaceTempView("groupby_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", new IntAverageUDAF)
+    df.sparkSession.sql(sqlText = """
+      SELECT count(c1_int), intAverage(c1_int), max(c2_int), count(c2_int),
+             intAverage(c2_int), intAverage(c2_int + 1)
+      FROM groupby_scala_average_udaf_test_table
+      GROUP BY key_str
+    """)
+  }
+
+  IGNORE_ORDER_testSparkResultsAreEqual(testName = "Reduction with ScalaUDAF Average",
+      groupbyStringsIntsIntsFromCsv) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    // "repartition(7)" is to avoid the Complete mode of the aggregate.
+    df.repartition(7).createOrReplaceTempView("reduction_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", new IntAverageUDAF)
+    df.sparkSession.sql(sqlText = """
+      SELECT intAverage(c1_int), count(c1_int), max(c1_int), intAverage(c2_int),
+             intAverage(c2_int + 1), max(c2_int)
+      FROM reduction_scala_average_udaf_test_table
+    """)
+  }
+
+  private val emptyDfSchema = StructType(Seq(
+    StructField("key_str", StringType, nullable = true),
+    StructField("c1_int", IntegerType, nullable = true),
+    StructField("c2_int", IntegerType, nullable = true))
+  )
+
+  IGNORE_ORDER_testSparkResultsAreEqual(
+    testName = "Reduction with ScalaUDAF Average on empty dataset",
+    ss => emptyRowsDf(ss, emptyDfSchema)) { df =>
+    // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of
+    // the specific UDAF implementation itself.
+    df.createOrReplaceTempView("reduction_scala_average_udaf_test_table")
+    df.sparkSession.udf.register("intAverage", new IntAverageUDAF)
+    df.sparkSession.sql(sqlText =
+      """
+      SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int)
+      FROM reduction_scala_average_udaf_test_table
+    """)
+  }
+}
+
+@scala.annotation.nowarn("msg= is deprecated")
+class IntAverageUDAF extends UserDefinedAggregateFunction with RapidsUDAF {
+
+  // ===== CPU Spark UDAF Implementation =====
+  override def inputSchema: StructType = StructType(Seq(StructField("intValue", IntegerType)))
+
+  override def bufferSchema: StructType = StructType(Seq(
+    StructField("sum", LongType),
+    StructField("count", LongType)
+  ))
+
+  override def dataType: DataType = IntegerType
+
+  override def deterministic: Boolean = true
+
+  override def initialize(buffer: MutableAggregationBuffer): Unit = {
+    buffer.update(0, null) // sum
+    buffer.update(1, 0L) // count
+  }
+
+  override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+    if (!input.isNullAt(0)) {
+      buffer(0) = if(buffer.isNullAt(0)) {
+        input.getInt(0).toLong
+      } else {
+        buffer.getLong(0) + input.getInt(0)
+      } // sum
+      buffer(1) = buffer.getLong(1) + 1L // count
+    }
+  }
+
+  override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+    if (buffer1.isNullAt(0) && !buffer2.isNullAt(0)) {
+      buffer1(0) = buffer2.getLong(0)
+    } else if (!buffer1.isNullAt(0) && !buffer2.isNullAt(0)) {
+      buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0) // sum
+    } else {
+      // NOOP buffer2(0) is null so buffer1 holds the correct value already
+    }
+    buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1) // count
+  }
+
+  override def evaluate(buffer: Row): Any = {
+    val count = buffer.getLong(1)
+    // toInt is safe since no overflows here
+    if (count == 0) null else (buffer.getLong(0) / count).toInt
+  }
+
+  // ===== GPU RapidsUDAF Implementation =====
+  override def getDefaultValue: Array[Scalar] = {
+    // Return default values for [sum, count] - these need to match the output of
+    // "updateAggregation" and also ideally match the output of initialize in the
+    // CPU version.
+    // Make sure that if we get an exception we do not leak memory
+    closeOnExcept(Scalar.fromNull(DType.INT64)) { nullScalar =>
+      Array(
+        nullScalar, // null sum (Long)
+        Scalar.fromLong(0L) // 0 count (Long)
+      )
+    }
+  }
+
+  override def preProcess(numRows: Int, args: Array[ColumnVector]): Array[ColumnVector] = {
+    require(args.length == 1)
+    withResource(args.head) { intArg =>
+      Array(intArg.castTo(DType.INT64)) // Cast int to long to avoid potential overflow
+    }
+  }
+
+  override def postProcess(numRows: Int, args: Array[ColumnVector],
+      outType: DataType): ColumnVector = {
+    // Final step: divide sum by count to get average. Perform element-wise
+    // division: sum / count.
+    // Note that if the COUNT is 0 the SUM is null.
+    val averageCol = withResource(args) { _ =>
+      val sumCol = args(0)
+      val countCol = args(1)
+      sumCol.div(countCol)
+    }
+    withResource(averageCol) { averageCol =>
+      // Cast to integers, no overflows here.
+      averageCol.castTo(DType.INT32)
+    }
+  }
+
+  override def aggBufferTypes(): Array[DataType] = bufferSchema.map(_.dataType).toArray
+
+  override def updateAggregation(): RapidsUDAFGroupByAggregation = {
+    new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses default implementation (pass-through)
+
+      override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = {
+        // For reduction (no group-by keys), compute SUM and COUNT directly
+        val inputCol = preStepData(0)
+        // Make sure that we don't leak if there is an exception
+        closeOnExcept(inputCol.sum()) { sum =>
+          val count = Scalar.fromLong(inputCol.getRowCount - inputCol.getNullCount)
+          Array(sum, count)
+        }
+      }
+
+      override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = {
+        // For group-by aggregation, create SUM and COUNT operations
+        val colIndex = inputIndices(0)
+        Array(
+          GroupByAggregation.sum().onColumn(colIndex),
+          GroupByAggregation.count().onColumn(colIndex)
+        )
+      }
+
+      override def postStep(aggregatedData: Array[ColumnVector]): Array[ColumnVector] = {
+        // cudf count() aggregate produces an integer column, so convert it to Long
+        // to match the agg buffer type.
+        require(aggregatedData.length == 2, "Expect two columns for postStep during update")
+        withResource(aggregatedData) { _ =>
+          // sum, count
+          Array(aggregatedData.head.incRefCount(), aggregatedData(1).castTo(DType.INT64))
+        }
+      }
+    }
+  }
+
+  override def mergeAggregation(): RapidsUDAFGroupByAggregation = {
+    new RapidsSimpleGroupByAggregation() {
+      // "preStep" uses default implementation (pass-through)
+
+      override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = {
+        // Merge by summing both sum and count columns
+        val sumCol = preStepData(0)
+        val countCol = preStepData(1)
+
+        // Avoid leaks even if there is an exception when merging countCol
+        closeOnExcept(sumCol.sum()) { mergedSum =>
+          val mergedCount = countCol.sum()
+          Array(mergedSum, mergedCount)
+        }
+      }
+
+      override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = {
+        // Merge by summing both columns
+        Array(
+          GroupByAggregation.sum().onColumn(inputIndices(0)), // sum of sums
+          GroupByAggregation.sum().onColumn(inputIndices(1)) // sum of counts
+        )
+      }
+
+      // "postStep" uses default implementation (pass-through)
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala
index aaea8a36ee8..68c324e3734 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala
@@ -1315,6 +1315,10 @@ trait SparkQueryCompareTestSuite extends AnyFunSuite with BeforeAndAfterAll {
     conf.set(RapidsConf.GPU_BATCH_SIZE_BYTES.key, batchSize.toString)
   }
 
+  def emptyRowsDf(session: SparkSession, schema: StructType): DataFrame = {
+    session.createDataFrame(session.sparkContext.parallelize(Seq.empty[Row], 2), schema)
+  }
+
   def mixedDfWithBuckets(session: SparkSession): DataFrame = {
     import session.implicits._
     Seq[(java.lang.Integer, java.lang.Long, java.lang.Double, String, java.lang.Integer, String)](
@@ -2022,6 +2026,14 @@ trait SparkQueryCompareTestSuite extends AnyFunSuite with BeforeAndAfterAll {
     )))(_)
   }
 
+  def groupbyStringsIntsIntsFromCsv: SparkSession => DataFrame = {
+    fromCsvDf("group_strings_ints_ints.csv", StructType(Array(
+      StructField("key_str", StringType, nullable = true),
+      StructField("c1_int", IntegerType, nullable = true),
+      StructField("c2_int", IntegerType, nullable = true)
+    )))(_)
+  }
+
   def singularDoubleDf(session: SparkSession): DataFrame = {
     import session.implicits._
     Seq(1.1).toDF("double")
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index fc397301fbb..16567ea55fb 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -41,7 +41,7 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("collect functions should be able to cast to array type with no null values", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
     .exclude("SPARK-17641: collect functions should not collect null values", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
     .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", WONT_FIX_ISSUE("Codegen related UT, not applicable for GPU"))
-    .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"), (getJavaMajorVersion() >= 17))
+    .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"), (getJavaMajorVersion() >= 11))
   enableSuite[RapidsJsonExpressionsSuite]
     .exclude("from_json - invalid data", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10891"))
     .exclude("from_json - input=empty array, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10907"))
diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/320/operatorsScore.csv
+++ b/tools/generated_files/320/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/320/supportedExprs.csv
+++ b/tools/generated_files/320/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/321/operatorsScore.csv
+++ b/tools/generated_files/321/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/321/supportedExprs.csv
+++ b/tools/generated_files/321/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/321cdh/operatorsScore.csv
+++ b/tools/generated_files/321cdh/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/321cdh/supportedExprs.csv
+++ b/tools/generated_files/321cdh/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/322/operatorsScore.csv
+++ b/tools/generated_files/322/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/322/supportedExprs.csv
+++ b/tools/generated_files/322/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/323/operatorsScore.csv
+++ b/tools/generated_files/323/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/323/supportedExprs.csv
+++ b/tools/generated_files/323/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/324/operatorsScore.csv
+++ b/tools/generated_files/324/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/324/supportedExprs.csv
+++ b/tools/generated_files/324/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv
index d408b9e042d..4b116751d42 100644
--- a/tools/generated_files/330/operatorsScore.csv
+++ b/tools/generated_files/330/operatorsScore.csv
@@ -145,6 +145,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -237,6 +238,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv
index c607288973e..26ed32b65a8 100644
--- a/tools/generated_files/330/supportedExprs.csv
+++ b/tools/generated_files/330/supportedExprs.csv
@@ -834,7 +834,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv
index d408b9e042d..4b116751d42 100644
--- a/tools/generated_files/330cdh/operatorsScore.csv
+++ b/tools/generated_files/330cdh/operatorsScore.csv
@@ -145,6 +145,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -237,6 +238,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv
index c607288973e..26ed32b65a8 100644
--- a/tools/generated_files/330cdh/supportedExprs.csv
+++ b/tools/generated_files/330cdh/supportedExprs.csv
@@ -834,7 +834,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv
index 99bfa557c4c..e014362cd1a 100644
--- a/tools/generated_files/331/operatorsScore.csv
+++ b/tools/generated_files/331/operatorsScore.csv
@@ -146,6 +146,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -238,6 +239,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv
index 5077b8a0500..a286681f0a0 100644
--- a/tools/generated_files/331/supportedExprs.csv
+++ b/tools/generated_files/331/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv
index 99bfa557c4c..e014362cd1a 100644
--- a/tools/generated_files/332/operatorsScore.csv
+++ b/tools/generated_files/332/operatorsScore.csv
@@ -146,6 +146,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -238,6 +239,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv
index 5077b8a0500..a286681f0a0 100644
--- a/tools/generated_files/332/supportedExprs.csv
+++ b/tools/generated_files/332/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv
index 99bfa557c4c..e014362cd1a 100644
--- a/tools/generated_files/332cdh/operatorsScore.csv
+++ b/tools/generated_files/332cdh/operatorsScore.csv
@@ -146,6 +146,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -238,6 +239,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv
index 5077b8a0500..a286681f0a0 100644
--- a/tools/generated_files/332cdh/supportedExprs.csv
+++ b/tools/generated_files/332cdh/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv
index 99bfa557c4c..e014362cd1a 100644
--- a/tools/generated_files/333/operatorsScore.csv
+++ b/tools/generated_files/333/operatorsScore.csv
@@ -146,6 +146,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -238,6 +239,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv
index 5077b8a0500..a286681f0a0 100644
--- a/tools/generated_files/333/supportedExprs.csv
+++ b/tools/generated_files/333/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv
index 99bfa557c4c..e014362cd1a 100644
--- a/tools/generated_files/334/operatorsScore.csv
+++ b/tools/generated_files/334/operatorsScore.csv
@@ -146,6 +146,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -238,6 +239,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv
index 5077b8a0500..a286681f0a0 100644
--- a/tools/generated_files/334/supportedExprs.csv
+++ b/tools/generated_files/334/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv
index c8ad2436f86..8d57d996800 100644
--- a/tools/generated_files/340/operatorsScore.csv
+++ b/tools/generated_files/340/operatorsScore.csv
@@ -147,6 +147,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -239,6 +240,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv
index 3f650da22a3..a1a6d420c46 100644
--- a/tools/generated_files/340/supportedExprs.csv
+++ b/tools/generated_files/340/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv
index c8ad2436f86..8d57d996800 100644
--- a/tools/generated_files/341/operatorsScore.csv
+++ b/tools/generated_files/341/operatorsScore.csv
@@ -147,6 +147,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -239,6 +240,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv
index 3f650da22a3..a1a6d420c46 100644
--- a/tools/generated_files/341/supportedExprs.csv
+++ b/tools/generated_files/341/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv
index c8ad2436f86..8d57d996800 100644
--- a/tools/generated_files/342/operatorsScore.csv
+++ b/tools/generated_files/342/operatorsScore.csv
@@ -147,6 +147,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -239,6 +240,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv
index 3f650da22a3..a1a6d420c46 100644
--- a/tools/generated_files/342/supportedExprs.csv
+++ b/tools/generated_files/342/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv
index c8ad2436f86..8d57d996800 100644
--- a/tools/generated_files/343/operatorsScore.csv
+++ b/tools/generated_files/343/operatorsScore.csv
@@ -147,6 +147,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -239,6 +240,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv
index 3f650da22a3..a1a6d420c46 100644
--- a/tools/generated_files/343/supportedExprs.csv
+++ b/tools/generated_files/343/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/344/operatorsScore.csv b/tools/generated_files/344/operatorsScore.csv
index c8ad2436f86..8d57d996800 100644
--- a/tools/generated_files/344/operatorsScore.csv
+++ b/tools/generated_files/344/operatorsScore.csv
@@ -147,6 +147,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -239,6 +240,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/344/supportedExprs.csv b/tools/generated_files/344/supportedExprs.csv
index 3f650da22a3..a1a6d420c46 100644
--- a/tools/generated_files/344/supportedExprs.csv
+++ b/tools/generated_files/344/supportedExprs.csv
@@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv
index 031830ad733..2de3aac1d29 100644
--- a/tools/generated_files/350/operatorsScore.csv
+++ b/tools/generated_files/350/operatorsScore.csv
@@ -151,6 +151,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -244,6 +245,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/350/supportedExprs.csv
+++ b/tools/generated_files/350/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv
index 031830ad733..2de3aac1d29 100644
--- a/tools/generated_files/351/operatorsScore.csv
+++ b/tools/generated_files/351/operatorsScore.csv
@@ -151,6 +151,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -244,6 +245,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/351/supportedExprs.csv
+++ b/tools/generated_files/351/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/352/operatorsScore.csv b/tools/generated_files/352/operatorsScore.csv
index fe689f780a9..36e50978ad8 100644
--- a/tools/generated_files/352/operatorsScore.csv
+++ b/tools/generated_files/352/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -245,6 +246,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/352/supportedExprs.csv b/tools/generated_files/352/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/352/supportedExprs.csv
+++ b/tools/generated_files/352/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/353/operatorsScore.csv b/tools/generated_files/353/operatorsScore.csv
index fe689f780a9..36e50978ad8 100644
--- a/tools/generated_files/353/operatorsScore.csv
+++ b/tools/generated_files/353/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -245,6 +246,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/353/supportedExprs.csv b/tools/generated_files/353/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/353/supportedExprs.csv
+++ b/tools/generated_files/353/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/354/operatorsScore.csv b/tools/generated_files/354/operatorsScore.csv
index fe689f780a9..36e50978ad8 100644
--- a/tools/generated_files/354/operatorsScore.csv
+++ b/tools/generated_files/354/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -245,6 +246,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/354/supportedExprs.csv b/tools/generated_files/354/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/354/supportedExprs.csv
+++ b/tools/generated_files/354/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/355/operatorsScore.csv b/tools/generated_files/355/operatorsScore.csv
index fe689f780a9..36e50978ad8 100644
--- a/tools/generated_files/355/operatorsScore.csv
+++ b/tools/generated_files/355/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -245,6 +246,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/355/supportedExprs.csv b/tools/generated_files/355/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/355/supportedExprs.csv
+++ b/tools/generated_files/355/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/356/operatorsScore.csv b/tools/generated_files/356/operatorsScore.csv
index fe689f780a9..36e50978ad8 100644
--- a/tools/generated_files/356/operatorsScore.csv
+++ b/tools/generated_files/356/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -245,6 +246,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/356/supportedExprs.csv b/tools/generated_files/356/supportedExprs.csv
index 99fe3750667..8d9c8ea8d49 100644
--- a/tools/generated_files/356/supportedExprs.csv
+++ b/tools/generated_files/356/supportedExprs.csv
@@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/400/operatorsScore.csv b/tools/generated_files/400/operatorsScore.csv
index fd239d29725..2d6ed65f4c8 100644
--- a/tools/generated_files/400/operatorsScore.csv
+++ b/tools/generated_files/400/operatorsScore.csv
@@ -152,6 +152,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -246,6 +247,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/400/supportedExprs.csv b/tools/generated_files/400/supportedExprs.csv
index 7861c579d8b..c3bc508ec68 100644
--- a/tools/generated_files/400/supportedExprs.csv
+++ b/tools/generated_files/400/supportedExprs.csv
@@ -846,7 +846,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/401/operatorsScore.csv b/tools/generated_files/401/operatorsScore.csv
index bbaa15c9e66..d7a45e08280 100644
--- a/tools/generated_files/401/operatorsScore.csv
+++ b/tools/generated_files/401/operatorsScore.csv
@@ -154,6 +154,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -248,6 +249,8 @@ Round,4
 RoundCeil,4
 RoundFloor,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/401/supportedExprs.csv b/tools/generated_files/401/supportedExprs.csv
index c22306819e3..7a24562a639 100644
--- a/tools/generated_files/401/supportedExprs.csv
+++ b/tools/generated_files/401/supportedExprs.csv
@@ -850,7 +850,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA
 InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv
index d9e9da6221f..738e376e1b6 100644
--- a/tools/generated_files/operatorsScore.csv
+++ b/tools/generated_files/operatorsScore.csv
@@ -140,6 +140,7 @@ Greatest,4
 HiveGenericUDF,4
 HiveHash,4
 HiveSimpleUDF,4
+HiveUDAFFunction,4
 Hour,4
 HyperLogLogPlusPlus,4
 Hypot,4
@@ -227,6 +228,8 @@ Reverse,4
 Rint,4
 Round,4
 RowNumber,4
+ScalaAggregator,4
+ScalaUDAF,4
 ScalaUDF,4
 ScalarSubquery,4
 Second,4
diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
index a278aed8a06..2e95ea0c96f 100644
--- a/tools/generated_files/supportedExprs.csv
+++ b/tools/generated_files/supportedExprs.csv
@@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic
 NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
 HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS