diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md index 62134b14624..2fed38b0e74 100644 --- a/docs/additional-functionality/advanced_configs.md +++ b/docs/additional-functionality/advanced_configs.md @@ -435,8 +435,11 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.StaticInvoke| |StaticInvoke|true|The supported types are not deterministic since it's a dynamic expression| spark.rapids.sql.expression.NormalizeNaNAndZero| |Normalize NaN and zero|true|None| spark.rapids.sql.expression.ScalarSubquery| |Subquery that will return only one row and one column|true|None| +spark.rapids.sql.expression.ScalaAggregator| |User Defined Aggregator, it can choose to implement a RAPIDS accelerated interface to get better performance.|true|None| +spark.rapids.sql.expression.ScalaUDAF| |User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance.|true|None| spark.rapids.sql.expression.HiveGenericUDF| |Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance|true|None| spark.rapids.sql.expression.HiveSimpleUDF| |Hive UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance|true|None| +spark.rapids.sql.expression.HiveUDAFFunction| |Hive user defined aggregate function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance|true|None| ### Execution diff --git a/docs/supported_ops.md b/docs/supported_ops.md index e5878efe5d9..6fd59e312b2 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -22886,6 +22886,202 @@ are limited. NS +ScalaAggregator + +User Defined Aggregator, it can choose to implement a RAPIDS accelerated interface to get better performance. +None +aggregation +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +reduction +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +ScalaUDAF + +User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance. +None +aggregation +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +reduction +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + HiveGenericUDF Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance @@ -22987,6 +23183,132 @@ are limited. NS NS + +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT +DAYTIME +YEARMONTH + + +HiveUDAFFunction + +Hive user defined aggregate function, the UDAF can choose to implement a RAPIDS accelerated interface to get better performance +None +aggregation +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +reduction +param +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + + +result +S +S +S +S +S +S +S +S +PS
UTC is only supported TZ for TIMESTAMP
+S +S +S +S +S +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types UDT, DAYTIME, YEARMONTH
+NS +NS +NS + ## Casting diff --git a/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java new file mode 100644 index 00000000000..750679181e9 --- /dev/null +++ b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/IntLongAverageHiveUDAF.java @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tests.udf.hive; + +import java.util.ArrayList; + +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.DType; +import ai.rapids.cudf.GroupByAggregation; +import ai.rapids.cudf.GroupByAggregationOnColumn; +import ai.rapids.cudf.Scalar; +import com.nvidia.spark.RapidsSimpleGroupByAggregation; +import com.nvidia.spark.RapidsUDAF; + +import com.nvidia.spark.RapidsUDAFGroupByAggregation; +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.spark.sql.types.DataType; + +import static org.apache.spark.sql.types.DataTypes.IntegerType; +import static org.apache.spark.sql.types.DataTypes.LongType; + +/** Used by hive_udaf_test */ +@SuppressWarnings("deprecation") +public class IntLongAverageHiveUDAF extends AbstractGenericUDAFResolver implements RapidsUDAF { + // ===== CPU Hive UDAF Implementation ===== + // Build an evaluator for the aggregation + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] args) throws SemanticException { + if (args.length != 1) { + throw new HadoopIllegalArgumentException("Exactly one argument is expected."); + } + PrimitiveObjectInspector.PrimitiveCategory inType = + ((PrimitiveTypeInfo) args[0]).getPrimitiveCategory(); + if (inType == PrimitiveObjectInspector.PrimitiveCategory.LONG || + inType == PrimitiveObjectInspector.PrimitiveCategory.INT) { + boolean isInt = inType == PrimitiveObjectInspector.PrimitiveCategory.INT; + return new UDAFAverageEvaluatorLong(isInt); + } + throw new HadoopIllegalArgumentException("Only support 'long' or 'int' as input"); + } + + class AverageAggBuf extends GenericUDAFEvaluator.AbstractAggregationBuffer { + private long sum; + private long count; + } + + @SuppressWarnings("deprecation") + class UDAFAverageEvaluatorLong extends GenericUDAFEvaluator { + private final boolean isInt; + + UDAFAverageEvaluatorLong(boolean isInt) { + this.isInt = isInt; + } + + transient private PrimitiveObjectInspector inputOI; + transient private StructObjectInspector tempOI; + + transient private StructField countField; + transient private StructField sumField; + + transient private LongObjectInspector countFieldOI; + transient private LongObjectInspector sumFieldOI; + + transient private Object[] partialRet; + + @Override + public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException { + super.init(mode, parameters); + assert (parameters.length == 1); + + partialRet = new Object[2]; + partialRet[0] = new LongWritable(0); + partialRet[1] = new LongWritable(0); + // for the input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + } else { + tempOI = (StructObjectInspector) parameters[0]; + sumField = tempOI.getStructFieldRef("sum"); + countField = tempOI.getStructFieldRef("count"); + countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector(); + sumFieldOI = (LongObjectInspector) sumField.getFieldObjectInspector(); + } + + // for the output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + // The output of a partial aggregation is a struct containing + // a "long" count and a "long" sum. + // a "long" count and a "long" sum. + ArrayList foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + ArrayList fnames = new ArrayList(); + fnames.add("sum"); + fnames.add("count"); + return ObjectInspectorFactory.getStandardStructObjectInspector(fnames, foi); + } else { + if (isInt) { + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } else { + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() { + return new AverageAggBuf(); + } + + @Override + public void iterate(AggregationBuffer aggBuffer, Object[] parameters) throws HiveException { + assert (parameters.length == 1); + Object obj = parameters[0]; + if (obj != null) { + AverageAggBuf buf = (AverageAggBuf) aggBuffer; + buf.count += 1; + if (isInt) { + buf.sum += PrimitiveObjectInspectorUtils.getInt(obj, inputOI); + } else { + buf.sum += PrimitiveObjectInspectorUtils.getLong(obj, inputOI); + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer aggBuffer) throws HiveException { + AverageAggBuf buf = (AverageAggBuf) aggBuffer; + ((LongWritable) partialRet[0]).set(buf.sum); + ((LongWritable) partialRet[1]).set(buf.count); + return partialRet; + } + + @Override + public void merge(AggregationBuffer aggBuffer, Object partial) throws HiveException { + if (partial != null) { + AverageAggBuf buf = (AverageAggBuf) aggBuffer; + long count = countFieldOI.get(tempOI.getStructFieldData(partial, countField)); + buf.count += count; + Object sumValue = tempOI.getStructFieldData(partial, sumField); + if (sumValue != null) { + buf.sum += sumFieldOI.get(sumValue); + } + } + } + + @Override + public Object terminate(AggregationBuffer aggBuffer) throws HiveException { + AverageAggBuf buf = (AverageAggBuf) aggBuffer; + if (buf.count == 0) { + return null; + } else { + if (isInt) { + IntWritable result = new IntWritable(0); + result.set((int)(buf.sum/buf.count)); + return result; + } else { + LongWritable result = new LongWritable(0); + result.set(buf.sum/buf.count); + return result; + } + } + } + + @Override + public void reset(AggregationBuffer aggBuffer) throws HiveException { + AverageAggBuf buf = (AverageAggBuf) aggBuffer; + buf.count = 0; + buf.sum = 0; + } + } // end of UDAFAverageEvaluatorLong + + // ===== GPU RapidsUDAF Implementation ===== + @Override + public Scalar[] getDefaultValue() { + // Return default values for [sum, count] - these need to match the + // output of updateAggregation and also ideally match the output of + // initialize in the CPU Hive version. + Scalar sum = Scalar.fromNull(DType.INT64); + try { + Scalar count = Scalar.fromLong(0L); + return new Scalar[]{sum, count}; + } catch (Exception e) { + // Make sure 'sum' is closed if any exceptions after being created, to avoid + // GPU memory leak. + sum.close(); + throw e; + } + } + + @Override + public ColumnVector[] preProcess(int numRows, ColumnVector[] args) { + try (ColumnVector inputInt = args[0]) { + if (args.length != 1) { + throw new IllegalArgumentException("Expect only one column for preProcess."); + } + return new ColumnVector[] {inputInt.castTo(DType.INT64)}; + } + } + + @Override + public ColumnVector postProcess(int numRows, ColumnVector[] args, DataType outType) { + ColumnVector ret = null; + // Final step: divide sum by count to get average + try (ColumnVector sumCol = args[0]; + ColumnVector countCol = args[1]) { + if (args.length != 2) { + throw new IllegalArgumentException("Expect twos column for postProcess."); + } + ret = sumCol.div(countCol); + } + if (IntegerType.equals(outType)) { + try (ColumnVector longRet = ret) { + return longRet.castTo(DType.INT32); + } + } else { + return ret; + } + } + + @Override + public RapidsUDAFGroupByAggregation updateAggregation() { + return new RapidsSimpleGroupByAggregation() { + // "preStep" uses the default implementation (pass-through) + + @Override + public Scalar[] reduce(int numRows, ColumnVector[] preStepData) { + if (preStepData.length != 1) { + throw new IllegalArgumentException("Expect only one column for update reduce."); + } + // For reduction (no group-by keys), compute SUM and COUNT directly + ColumnVector inCol = preStepData[0]; + Scalar sum = inCol.sum(); + try { + Scalar count = Scalar.fromLong(inCol.getRowCount() - inCol.getNullCount()); + return new Scalar[]{sum, count}; + } catch (Exception e) { + // Make sure that we don't leak if there is an exception. + sum.close(); + throw e; + } + } + + @Override + public GroupByAggregationOnColumn[] aggregate(int[] inputIndices) { + if (inputIndices.length != 1) { + throw new IllegalArgumentException("Expect only one column for update aggregate."); + } + // For group-by aggregation, create SUM and COUNT operations + int colIndex = inputIndices[0]; + return new GroupByAggregationOnColumn[]{ + GroupByAggregation.sum().onColumn(colIndex), + GroupByAggregation.count().onColumn(colIndex) + }; + } + + @Override + public ColumnVector[] postStep(ColumnVector[] aggregatedData) { + // cudf count() aggregate produces an integer column, so convert them + // both to Long to match the agg buffer type. + assert (aggregatedData.length == 2); + try (ColumnVector sumLong = aggregatedData[0]; + ColumnVector countMaybeInt = aggregatedData[1]) { + ColumnVector countAsLong = countMaybeInt.castTo(DType.INT64); + return new ColumnVector[] {sumLong.incRefCount(), countAsLong}; + } + } + }; + } + + @Override + public RapidsUDAFGroupByAggregation mergeAggregation() { + return new RapidsSimpleGroupByAggregation() { + // "preStep" uses the default implementation (pass-through) + + @Override + public Scalar[] reduce(int numRows, ColumnVector[] preStepData) { + if (preStepData.length != 2) { + throw new IllegalArgumentException("Expect twos column for merge reduce."); + } + ColumnVector sumCol = preStepData[0]; + ColumnVector countCol = preStepData[1]; + Scalar sum = sumCol.sum(); + try { + Scalar count = countCol.sum(); + return new Scalar[]{sum, count}; + } catch (Exception e) { + // Make sure that we don't leak if there is an exception. + sum.close(); + throw e; + } + } + + @Override + public GroupByAggregationOnColumn[] aggregate(int[] inputIndices) { + if (inputIndices.length != 2) { + throw new IllegalArgumentException("Expect twos column for merge aggregate."); + } + return new GroupByAggregationOnColumn[]{ + GroupByAggregation.sum().onColumn(inputIndices[0]), // sum of sums + GroupByAggregation.sum().onColumn(inputIndices[1]) // sum of counts + }; + } + + // "postStep" uses the default implementation (pass-through) + }; + } + + @Override + public DataType[] aggBufferTypes() { + return new DataType[]{LongType, LongType}; + } +} + diff --git a/integration_tests/src/main/python/hive_udaf_test.py b/integration_tests/src/main/python/hive_udaf_test.py new file mode 100644 index 00000000000..2edba72ecb5 --- /dev/null +++ b/integration_tests/src/main/python/hive_udaf_test.py @@ -0,0 +1,77 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from asserts import assert_gpu_and_cpu_are_equal_sql +from data_gen import gen_df, IntegerGen, int_gen, long_gen, idfn +from spark_session import with_spark_session, is_databricks_runtime +from hive_udf_utils import * +from marks import ignore_order, allow_non_gpu + +projected_aggs_list = [ + "average_agg(i), average_agg(l)", + "average_agg(i), max(i), average_agg(l), min(l)", + "max(i), average_agg(i), min(l), average_agg(l)", + "max(i), min(l), average_agg(i), average_agg(l)", + "average_agg(i), max(i), min(l), average_agg(l)", + "max(i), average_agg(i), average_agg(l), min(l)", + "average_agg(i), average_agg(l), max(i), min(l)" +] + +def hive_udaf_eval_fn(spark, data_gens): + load_hive_udf(spark, "average_agg", + "com.nvidia.spark.rapids.tests.udf.hive.IntLongAverageHiveUDAF") + return gen_df(spark, data_gens) + + +@ignore_order(local=True) +@pytest.mark.parametrize("aggs", projected_aggs_list, ids=idfn) +def test_groupby_with_hive_average_udaf(aggs): + with_spark_session(skip_if_no_hive) + # 'g' is the group key column, so at most 52 groups (include nulls) + data_gens = [["g", IntegerGen(min_val=0, max_val=50)], ["i", int_gen], ["l", long_gen]] + assert_gpu_and_cpu_are_equal_sql( + lambda spark: hive_udaf_eval_fn(spark, data_gens), + "groupby_hive_udaf_table", + "SELECT g, {} FROM groupby_hive_udaf_table GROUP BY g".format(aggs), + conf={"spark.sql.catalogImplementation": "hive"}) + + +@ignore_order(local=True) +@pytest.mark.parametrize("aggs", projected_aggs_list, ids=idfn) +def test_reduction_with_hive_average_udaf(aggs): + with_spark_session(skip_if_no_hive) + assert_gpu_and_cpu_are_equal_sql( + lambda spark: hive_udaf_eval_fn(spark, [["i", int_gen], ["l", long_gen]]), + "reduction_hive_udaf_table", + "SELECT {} FROM reduction_hive_udaf_table".format(aggs), + conf={"spark.sql.catalogImplementation": "hive"}) + + +@ignore_order(local=True) +@pytest.mark.skipif(is_databricks_runtime(), reason="Databricks does not support mixed aggs") +@allow_non_gpu("ObjectHashAggregateExec", "ProjectExec") +@pytest.mark.parametrize("aggs", projected_aggs_list[0:2], ids=idfn) +@pytest.mark.parametrize("repl_mode", ["partial", "final"], ids=idfn) +def test_groupby_with_mixed_hive_average_udaf(aggs, repl_mode): + with_spark_session(skip_if_no_hive) + # 'g' is the group key column, so at most 52 groups (include nulls) + data_gens = [["g", IntegerGen(min_val=0, max_val=50)], ["i", int_gen], ["l", long_gen]] + assert_gpu_and_cpu_are_equal_sql( + lambda spark: hive_udaf_eval_fn(spark, data_gens), + "groupby_hive_udaf_table", + "SELECT g, {} FROM groupby_hive_udaf_table GROUP BY g".format(aggs), + conf={"spark.sql.catalogImplementation": "hive", + "spark.rapids.sql.hashAgg.replaceMode": repl_mode}) diff --git a/integration_tests/src/main/python/hive_udf_utils.py b/integration_tests/src/main/python/hive_udf_utils.py new file mode 100644 index 00000000000..ae3e863988e --- /dev/null +++ b/integration_tests/src/main/python/hive_udf_utils.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from conftest import skip_unless_precommit_tests + + +def drop_udf(spark, udf_name): + spark.sql(f"DROP TEMPORARY FUNCTION IF EXISTS `{udf_name}`") + + +def skip_if_no_hive(spark): + if spark.conf.get("spark.sql.catalogImplementation") != "hive": + skip_unless_precommit_tests('The Spark session does not have Hive support') + + +def load_hive_udf(spark, udf_name, udf_class): + drop_udf(spark, udf_name) + # if UDF failed to load, throws AnalysisException, check if the udf class is in the class path + spark.sql(f"CREATE TEMPORARY FUNCTION `{udf_name}` AS '{udf_class}'") diff --git a/integration_tests/src/main/python/row-based_udf_test.py b/integration_tests/src/main/python/row-based_udf_test.py index e849a87b10e..d2a3017273d 100644 --- a/integration_tests/src/main/python/row-based_udf_test.py +++ b/integration_tests/src/main/python/row-based_udf_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,19 +17,8 @@ from asserts import assert_gpu_and_cpu_are_equal_sql from data_gen import * from spark_session import with_spark_session, is_spark_350_or_later -from conftest import skip_unless_precommit_tests +from hive_udf_utils import * -def drop_udf(spark, udfname): - spark.sql("DROP TEMPORARY FUNCTION IF EXISTS {}".format(udfname)) - -def skip_if_no_hive(spark): - if spark.conf.get("spark.sql.catalogImplementation") != "hive": - skip_unless_precommit_tests('The Spark session does not have Hive support') - -def load_hive_udf(spark, udfname, udfclass): - drop_udf(spark, udfname) - # if UDF failed to load, throws AnalysisException, check if the udf class is in the class path - spark.sql("CREATE TEMPORARY FUNCTION {} AS '{}'".format(udfname, udfclass)) @pytest.mark.xfail(condition=is_spark_350_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/9064') diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java new file mode 100644 index 00000000000..3b5aa06742a --- /dev/null +++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsSimpleGroupByAggregation.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark; + +import ai.rapids.cudf.GroupByAggregationOnColumn; + +/** + * Standard CUDF-based aggregation step that uses built-in CUDF aggregation + * operations. This handles the most common aggregation patterns and provides + * the best performance. + */ +public interface RapidsSimpleGroupByAggregation extends RapidsUDAFGroupByAggregation { + /** + * The main aggregation step that uses built-in CUDF GroupBy operations. + * + * @param inputIndices An array of ints, which are the indices of the input + * columns. + * @return An array of CUDF `GroupByAggregationOnColumn` instances. + */ + GroupByAggregationOnColumn[] aggregate(int[] inputIndices); +} diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java new file mode 100644 index 00000000000..885e621ab7f --- /dev/null +++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAF.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark; + +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.Scalar; +import org.apache.spark.sql.types.DataType; + +/** + * An interface for a GPU-accelerated User Defined Aggregate Function (UDAF). + * This provides the necessary methods to perform distributed group-by and + * reduction aggregations using CUDF. + */ +public interface RapidsUDAF { + + /** + * Provides an array of default values for the aggregation result. This is + * used when a reduction aggregation does not have any rows to aggregate. + *
+ * Rapids will close these Scalars after being converted to columns. + *
+ * @return An array of cudf Scalar representing the output of the + * updateAggregation stage of processing. The output of this + * may still be merged with other tasks. + */ + Scalar[] getDefaultValue(); + + /** + * A pre-processing step that transforms the input ColumnVector arguments. + * This method is similar to a regular RapidsUDF but returns an array of + * ColumnVectors. By default, this is a no-op and will just return the + * arguments passed in. + *
+ * Users should close the input columns to avoid GPU memory leak, while the + * returned columns will be closed by the Rapids automatically. + * + * @param numRows The number of rows to process. This is for cases + * like a `COUNT(*)`, where there may be no arguments to a UDAF. + * This is not common. + * @param args An array of ColumnVector arguments. + * @return An array of ColumnVectors representing the pre-processed data. + */ + default ColumnVector[] preProcess(int numRows, ColumnVector[] args) { + return args; + } + + /** + * This method returns a RapidsUDAFGroupByAggregation that defines the + * logic for the initial aggregation. The preProcess method will be called + * first, and its output will then be processed by the + * RapidsUDAFGroupByAggregation that this method returns. + *
+ * @return A RapidsUDAFGroupByAggregation that defines the aggregation + * logic. + */ + RapidsUDAFGroupByAggregation updateAggregation(); + + /** + * This method returns a RapidsUDAFGroupByAggregation that defines how to + * merge two sets of aggregation results. This is used in distributed + * aggregation scenarios where intermediate results from different + * partitions are combined. + *
+ * @return A RapidsUDAFGroupByAggregation that defines the merge logic. + */ + RapidsUDAFGroupByAggregation mergeAggregation(); + + /** + * A post-processing step that takes the result of the final aggregation + * and performs any necessary transformations before returning the final + * result. This method returns a single ColumnVector, which is the final + * result of the aggregation. + *
+ * Users should close the input columns to avoid GPU memory leak. But the + * returned column will be closed by the Rapids automatically. + *
+ * @param numRows The number of rows in the aggregated data. + * @param args An array of ColumnVector arguments from the final aggregation step. + * @param outType The final data type of this UDAF + * @return A single ColumnVector representing the final UDAF result. + */ + ColumnVector postProcess(int numRows, ColumnVector[] args, DataType outType); + + /** + * Data types of the aggregate buffer. + *
+ * It is better to align with the "bufferSchema" of "UserDefinedAggregateFunction", or + * the "bufferEncoder" of "Aggregator" in Spark. Otherwise, data corruption are likely + * to happen when some operations of this aggregation fall back to CPU. E.g. Partial + * aggregates runs on CPU but final aggregates runs on GPU, or vice-versa. This is rare + * but just in case. + */ + DataType[] aggBufferTypes(); +} diff --git a/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java new file mode 100644 index 00000000000..8ca441b65af --- /dev/null +++ b/sql-plugin-api/src/main/java/com/nvidia/spark/RapidsUDAFGroupByAggregation.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark; + +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.Scalar; + +/** + * Base interface for GPU-accelerated UDAF aggregation implementations. This provides + * the contract for different aggregation strategies. + *

+ * Please do not try and extend from this interface directly. + * `RapidsSimpleGroupByAggregation` is currently supported as interfaces to directly + * implement. More may be added in the future. + */ +public interface RapidsUDAFGroupByAggregation { + /** + * An optional pre-step for the aggregation. By default, this is a no-op + * and will just return the arguments passed in. + *
+ * Users should close the input columns to avoid GPU memory leak, but the + * returned columns will be closed by the Rapids automatically. + *
+ * @param numRows The number of rows. + * @param args An array of input ColumnVectors. + * @return An array of ColumnVectors. + */ + default ColumnVector[] preStep(int numRows, ColumnVector[] args) { + return args; + } + + /** + * Performs a reduction on the pre-step output (no keys). The + * output of this will be turned into a ColumnVector and possibly + * combined with other rows before being processed more. + *
+ * Rapids will close both the input columns and returned Scalars automatically. + * + * @param numRows The number of rows to process. + * @param preStepData The output from the preStep method. + * @return An array of cudf Scalars representing the reduced data. + */ + Scalar[] reduce(int numRows, ColumnVector[] preStepData); + + /** + * A post-process step for the aggregation. It takes the output of the + * aggregations and performs any processing needed to make it match the + * input to the merge aggregation. + *
+ * Users should close the input columns to avoid GPU memory leak, but the + * returned columns will be closed by the Rapids automatically. + * + * @param aggregatedData The output from the aggregation step. They should be + * closed when no longer needed. + * @return An array of ColumnVectors compatible with the merge step. + */ + default ColumnVector[] postStep(ColumnVector[] aggregatedData) { + return aggregatedData; + } +} diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java index edb22a0082f..7b9cc76c9a4 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java @@ -848,6 +848,8 @@ public static GpuColumnVector from(Scalar scalar, int count, DataType sparkType) /** * Creates a GpuColumnVector from a GpuScalar + * This is not recommended and will be deprecated in the future, use + * "from(GpuScalar scalar, int count)" instead. * * @param scalar the input GpuScalar * @param count the row number of the output column @@ -855,7 +857,23 @@ public static GpuColumnVector from(Scalar scalar, int count, DataType sparkType) * @return a GpuColumnVector. It should be closed to avoid memory leak. */ public static GpuColumnVector from(GpuScalar scalar, int count, DataType sparkType) { - return from(ai.rapids.cudf.ColumnVector.fromScalar(scalar.getBase(), count), sparkType); + if (scalar.dataType() != sparkType) { + throw new IllegalArgumentException("The given spark type(" + sparkType + + ") does not match the GpuScalar type(" + scalar.dataType() + ")."); + } + return from(scalar, count); + } + + /** + * Creates a GpuColumnVector from a GpuScalar + * + * @param scalar the input GpuScalar + * @param count the row number of the output column + * @return a GpuColumnVector. It should be closed to avoid memory leak. + */ + public static GpuColumnVector from(GpuScalar scalar, int count) { + return from(ai.rapids.cudf.ColumnVector.fromScalar(scalar.getBase(), count), + scalar.dataType()); } /** @@ -1002,6 +1020,27 @@ public static ColumnarBatch dropColumns(ColumnarBatch cb, boolean[] dropList) { return incRefCounts(ret); } + /** + * Slice the columns from the given columnar batch at the range of [start, end). + * 'start' should be in the range of '[0, numColumns]', and 'end' >= 'start'. + * Any invalid start or end will lead to an exception. + * 'start == numColumns' or 'start' == 'end' will return a batch with no columns. + */ + public static ColumnarBatch sliceColumns(ColumnarBatch cb, int start, int end) { + int numColumns = cb.numCols(); + if (0 <= start && start <= numColumns && start <= end) { + int numRows = cb.numRows(); + int realEnd = Math.min(end, numColumns); + ArrayList columns = new ArrayList<>(); + for (int i = start; i < realEnd; i++) { + columns.add(cb.column(i)); + } + ColumnarBatch ret = new ColumnarBatch(columns.toArray(new ColumnVector[0]), numRows); + return incRefCounts(ret); + } + throw new IllegalArgumentException("Invalid 'start' or 'end'"); + } + /** * Get the underlying Spark compatible columns from the batch. This does not increment any * reference counts so if you want to use these columns after the batch is closed diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala index 358a869033d..9ad6ce3cea9 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala @@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.execution.{ExplainUtils, SortExec, SparkPlan} import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.rapids.aggregate.{CpuToGpuAggregateBufferConverter, CudfAggregate, GpuAggregateExpression, GpuToCpuAggregateBufferConverter} +import org.apache.spark.sql.rapids.aggregate.{AdvAggTypeUtils, AdvancedCudfAggregate, CpuToGpuAggregateBufferConverter, CudfAggregate, GpuAdvancedAggregateFunction, GpuAggregateExpression, GpuToCpuAggregateBufferConverter} import org.apache.spark.sql.rapids.execution.{GpuBatchSubPartitioner, GpuShuffleMeta, TrampolineUtil} import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.ColumnarBatch @@ -305,6 +305,9 @@ object AggregateUtils extends Logging { repartitionHappened } + + type AdvancedStep = (Int, Array[GpuColumnVector]) => Array[GpuColumnVector] + type AdvancedAggHandler = (AdvancedStep, Int, Int) } /** Utility class to hold all of the metrics related to hash aggregation */ @@ -398,6 +401,10 @@ class AggHelper( private val postStep = new mutable.ArrayBuffer[Expression]() private val postStepAttr = new mutable.ArrayBuffer[Attribute]() + private val advPreStepArgs = new mutable.ArrayBuffer[Seq[Expression]]() + private val advPreSteps = new mutable.ArrayBuffer[AggregateUtils.AdvancedStep]() + private val advCudfAggregates = new mutable.ArrayBuffer[(AdvancedCudfAggregate, Int)]() + // we add the grouping expression first, which should bind as pass-through if (forceMerge) { // a grouping expression can do actual computation, but we cannot do that computation again @@ -409,91 +416,181 @@ class AggHelper( } postStep ++= groupingAttributes postStepAttr ++= groupingAttributes - postStepDataTypes ++= - groupingExpressions.map(_.dataType) + postStepDataTypes ++= groupingExpressions.map(_.dataType) + + private def addAdvancedAgg(advAgg: AdvancedCudfAggregate, + aggOrd: Int, + advPreStep: AggregateUtils.AdvancedStep, + advPreArgs: Seq[Expression]): Unit = { + // pre-process is done in two steps for an advanced agg: + // 1) pre-project by "preArgs" to get the arguments for the advPreStep, + // 2) call the given "advPreStep" function + advPreStepArgs += advPreArgs + advPreSteps += advPreStep + advCudfAggregates += ((advAgg, aggOrd)) + } private var ix = groupingAttributes.length - for (aggExp <- aggregateExpressions) { + aggregateExpressions.zipWithIndex.foreach { case (aggExp, ord) => val aggFn = aggExp.aggregateFunction if ((aggExp.mode == Partial || aggExp.mode == Complete) && !forceMerge) { - val ordinals = (ix until ix + aggFn.updateAggregates.length) - aggOrdinals ++= ordinals - ix += ordinals.length - val updateAggs = aggFn.updateAggregates - postStepDataTypes ++= updateAggs.map(_.dataType) - cudfAggregates ++= updateAggs - preStep ++= aggFn.inputProjection - postStep ++= aggFn.postUpdate - postStepAttr ++= aggFn.postUpdateAttr + aggFn match { + case advFn: GpuAdvancedAggregateFunction => + addAdvancedAgg(advFn.updateAggregate(), ord, advFn.preProcessAndClose, + aggFn.inputProjection) + case _ => + val ordinals = (ix until ix + aggFn.updateAggregates.length) + aggOrdinals ++= ordinals + ix += ordinals.length + val updateAggs = aggFn.updateAggregates + postStepDataTypes ++= updateAggs.map(_.dataType) + cudfAggregates ++= updateAggs + preStep ++= aggFn.inputProjection + postStep ++= aggFn.postUpdate + postStepAttr ++= aggFn.postUpdateAttr + } } else { - val ordinals = (ix until ix + aggFn.mergeAggregates.length) - aggOrdinals ++= ordinals - ix += ordinals.length - val mergeAggs = aggFn.mergeAggregates - postStepDataTypes ++= mergeAggs.map(_.dataType) - cudfAggregates ++= mergeAggs - preStep ++= aggFn.preMerge - postStep ++= aggFn.postMerge - postStepAttr ++= aggFn.postMergeAttr + aggFn match { + case advFn: GpuAdvancedAggregateFunction => + val advMergeAgg = advFn.mergeAggregate() + addAdvancedAgg(advMergeAgg, ord, advMergeAgg.preStepAndClose, + advFn.aggBufferAttributes) + case _ => + val ordinals = (ix until ix + aggFn.mergeAggregates.length) + aggOrdinals ++= ordinals + ix += ordinals.length + val mergeAggs = aggFn.mergeAggregates + postStepDataTypes ++= mergeAggs.map(_.dataType) + cudfAggregates ++= mergeAggs + preStep ++= aggFn.preMerge + postStep ++= aggFn.postMerge + postStepAttr ++= aggFn.postMergeAttr + } } } + private val advAggStart = preStep.length + // a bound expression that is applied before the cuDF aggregate private val preStepAttributes = if (forceMerge) { aggBufferAttributes } else { inputAttributes } - val preStepBound = GpuBindReferences.bindGpuReferencesTiered(preStep.toList, + + // From "preStep" to "postStep"(including "aggregate/reduce"), it splits + // aggregates into two parts, built-in ones and advanced ones, and processes them + // separately. Then combines the outputs into a single output batch, and the + // built-in columns always come before the advanced ones. For example, there are 3 + // aggregates, + // "max(a), advanced(b), min(a)", + // the columns in the "preProcess" output batch is like (assume "advanced(b)" produces + // two columns.) + // | group columns | col_max_a | col_min_a | col1_advanced_b | col2_advanced_b | + // + // Also the output batches of "reduce/aggregate" and "postStep" have the same layout. + val preStepBound = GpuBindReferences.bindGpuReferencesTiered( + preStep.toList ++ advPreStepArgs.flatten, // Append the arguments of advanced aggs preStepAttributes.toList, conf) // a bound expression that is applied after the cuDF aggregate private val postStepBound = GpuBindReferences.bindGpuReferencesTiered(postStep.toList, postStepAttr.toList, conf) + private val advPreStepAndArgLens = advPreSteps.zip(advPreStepArgs.map(_.length)) + + /** + * Perform the "preStep" including advanced aggregates, and return the combined + * result and the output columns number of every advanced aggregate. + * The input batch "cb" contains the pre-processed columns of the built-in + * aggregates and the arguments columns of the advanced aggregates. + * + * The returned column numbers are used to build the input for each advanced + * aggregate in the following "reduce" or "aggregate" operation. For example, + * input: | b | c | + * agg: advanced(b), advanced(c) + * output: | b_out1 | b_out2 | c_out1 |, {2, 1} + * + * "advanced(b)" produces 2 columns while "advanced(c)" returns only one column. + */ + private def preProcessWithAdvancedAggsAndClose(cb: ColumnarBatch): (ColumnarBatch, Seq[Int]) = { + val cols = GpuColumnVector.extractColumns(cb) + closeOnExcept(new ArrayBuffer[GpuColumnVector]()) { outCols => + // 1) Extract the pre-processed columns and append to the output + outCols ++= cols.slice(0, advAggStart) + // 2) Extract the arguments columns and process them by the advanced aggregates. + // 3) Append the results to the output + val argsCols = cols.slice(advAggStart, cols.length) + closeOnExcept(argsCols) { _ => + var idx = 0 + val outLens = new ArrayBuffer[Int]() + advPreStepAndArgLens.foreach { case (advPreProcess, argsLen) => + val endIdx = idx + argsLen + // advPreProcess is supposed to close the input columns "cols". + val args = argsCols.slice(idx, endIdx) + (idx until endIdx).foreach { i => + argsCols(i) = null // Avoid duplicate close on exceptions + } + val ret = advPreProcess(cb.numRows(), args) + outCols ++= ret + outLens += ret.length + idx = endIdx + } + require(idx == argsCols.length) // all the columns should be consumed + (new ColumnarBatch(outCols.toArray, cb.numRows()), outLens.toSeq) + } + } + } + /** * Apply the "pre" step: preMerge for merge, or pass-through in the update case * * @param toAggregateBatch - input (to the agg) batch from the child directly in the * merge case, or from the `inputProjection` in the update case. - * @return a pre-processed batch that can be later cuDF aggregated + * @param metrics - the GpuHashAggregateMetrics for this aggregation. + * @return a pre-processed batch that can be later cuDF aggregated, along with the + * output sizes for advanced aggregates. */ def preProcess( toAggregateBatch: ColumnarBatch, - metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = { + metrics: GpuHashAggregateMetrics): (SpillableColumnarBatch, Seq[Int]) = { val inputBatch = SpillableColumnarBatch(toAggregateBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) val projectedCb = NvtxRegistry.AGG_PRE_PROCESS { preStepBound.projectAndCloseWithRetrySingleBatch(inputBatch) } - SpillableColumnarBatch( - projectedCb, - SpillPriorities.ACTIVE_BATCHING_PRIORITY) + val (retCb, outLens) = if (advPreStepAndArgLens.nonEmpty) { + preProcessWithAdvancedAggsAndClose(projectedCb) + } else { + (projectedCb, Seq.empty) + } + (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens) } - def aggregate(preProcessed: ColumnarBatch, numAggs: GpuMetric): ColumnarBatch = { + def aggregate(preProcessed: ColumnarBatch, + advArgLens: Seq[Int], numAggs: GpuMetric): (ColumnarBatch, Seq[Int]) = { val ret = if (groupingOrdinals.nonEmpty) { - performGroupByAggregation(preProcessed) + performGroupByAggregation(preProcessed, advArgLens) } else { - performReduction(preProcessed) + performReduction(preProcessed, advArgLens) } numAggs += 1 ret } def aggregateWithoutCombine(metrics: GpuHashAggregateMetrics, - preProcessed: Iterator[SpillableColumnarBatch]): Iterator[SpillableColumnarBatch] = { + preProcessed: Iterator[(SpillableColumnarBatch, Seq[Int])] + ): Iterator[(SpillableColumnarBatch, Seq[Int])] = { val computeAggTime = metrics.computeAggTime val opTime = metrics.opTime val numAggs = metrics.numAggOps - preProcessed.flatMap { sb => + preProcessed.flatMap { case (sb, argLens) => withRetry(sb, splitSpillableInHalfByRows) { preProcessedAttempt => NvtxIdWithMetrics(NvtxRegistry.COMPUTE_AGGREGATE, computeAggTime, opTime) { withResource(preProcessedAttempt.getColumnarBatch()) { cb => - SpillableColumnarBatch( - aggregate(cb, numAggs), - SpillPriorities.ACTIVE_BATCHING_PRIORITY) + val (retCb, outLens) = aggregate(cb, argLens, numAggs) + (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens) } } } @@ -502,26 +599,28 @@ class AggHelper( def aggregate( metrics: GpuHashAggregateMetrics, - preProcessed: SpillableColumnarBatch): SpillableColumnarBatch = { + preProcessed: SpillableColumnarBatch, + advArgLens: Seq[Int]): (SpillableColumnarBatch, Seq[Int]) = { val numAggs = metrics.numAggOps val aggregatedSeq = withRetry(preProcessed, splitSpillableInHalfByRows) { preProcessedAttempt => withResource(preProcessedAttempt.getColumnarBatch()) { cb => - SpillableColumnarBatch( - aggregate(cb, numAggs), - SpillPriorities.ACTIVE_BATCHING_PRIORITY) + val (retCb, outLens) = aggregate(cb, advArgLens, numAggs) + (SpillableColumnarBatch(retCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY), outLens) } }.toSeq // We need to merge the aggregated batches into 1 before calling post process, // if the aggregate code had to split on a retry if (aggregatedSeq.size > 1) { - val concatted = concatenateBatchesWithRetry(metrics, aggregatedSeq) + val concatted = concatenateBatchesWithRetry(metrics, aggregatedSeq.map(_._1)) withRetryNoSplit(concatted) { attempt => + val outLens = aggregatedSeq.head._2 withResource(attempt.getColumnarBatch()) { cb => - SpillableColumnarBatch( - aggregate(cb, numAggs), + val scb = SpillableColumnarBatch( + aggregate(cb, outLens, numAggs)._1, SpillPriorities.ACTIVE_BATCHING_PRIORITY) + (scb, outLens) } } } else { @@ -530,24 +629,39 @@ class AggHelper( } /** - * Invoke reduction functions as defined in each `CudfAggreagte` + * Invoke reduction functions as defined in each `CudfAggregate` * * @param preProcessed - a batch after the "pre" step - * @return + * @param advArgLens - argument sizes of advanced aggregates. + * @return a reduced batch and the output sizes of advanced aggregates. */ - def performReduction(preProcessed: ColumnarBatch): ColumnarBatch = { + def performReduction( + preProcessed: ColumnarBatch, + advArgLens: Seq[Int]): (ColumnarBatch, Seq[Int]) = { + val reduceRowNum = 1 NvtxRegistry.AGG_REDUCE { - val cvs = mutable.ArrayBuffer[GpuColumnVector]() - cudfAggregates.zipWithIndex.foreach { case (cudfAgg, ix) => - val aggFn = cudfAgg.reductionAggregate + closeOnExcept(new mutable.ArrayBuffer[GpuColumnVector]()) { cvs => val cols = GpuColumnVector.extractColumns(preProcessed) - val reductionCol = cols(aggOrdinals(ix)) - withResource(aggFn(reductionCol.getBase)) { res => - cvs += GpuColumnVector.from( - cudf.ColumnVector.fromScalar(res, 1), cudfAgg.dataType) + cudfAggregates.zip(aggOrdinals).foreach { case (cudfAgg, ix) => + withResource(cudfAgg.reductionAggregate(cols(ix).getBase)) { res => + cvs += GpuColumnVector.from(res, reduceRowNum, cudfAgg.dataType) + } } + + // Process the advanced aggregates + var accLen = advAggStart + val outLens = new mutable.ArrayBuffer[Int]() + advCudfAggregates.zip(advArgLens).foreach { case ((advAgg, _), argLen) => + val argCols = (accLen until accLen + argLen).map(cols).toArray + cvs ++= withResource(advAgg.reduce(preProcessed.numRows, argCols)) { sas => + // Convert a scalar to a one row column for returning + outLens += sas.length + accLen += argLen + sas.safeMap(GpuColumnVector.from(_, reduceRowNum)) + } + } + (new ColumnarBatch(cvs.toArray, reduceRowNum), outLens.toSeq) } - new ColumnarBatch(cvs.toArray, 1) } } @@ -555,9 +669,12 @@ class AggHelper( * Used to produce a group-by aggregate * * @param preProcessed the batch after the "pre" step - * @return a Table that has been cuDF aggregated + * @param advArgLens - argument sizes of advanced aggregates. + * @return a Table that has been cuDF aggregated, along with the + * output sizes for advanced aggregates. */ - def performGroupByAggregation(preProcessed: ColumnarBatch): ColumnarBatch = { + def performGroupByAggregation(preProcessed: ColumnarBatch, + advArgLens: Seq[Int]): (ColumnarBatch, Seq[Int]) = { NvtxRegistry.AGG_GROUPBY { withResource(GpuColumnVector.from(preProcessed)) { preProcessedTbl => val groupOptions = cudf.GroupByOptions.builder() @@ -569,18 +686,128 @@ class AggHelper( case (cudfAgg, ord) => cudfAgg.groupByAggregate.onColumn(ord) } + // process advanced aggregates + var accArgStart = advAggStart + val advOutLens = new ArrayBuffer[Int]() + val advAggsOnColumns = advCudfAggregates.zip(advArgLens).flatMap { + case ((advAgg, _), argLen) => + val ret = if (advAgg.supportAdvanced) { + // Should not come here + throw new UnsupportedOperationException("Advanced aggregate is " + + "not supported yet") + } else { + advAgg.aggregate(Array.range(accArgStart, accArgStart + argLen)) + } + accArgStart += argLen + advOutLens += ret.length + ret + } + // perform the aggregate val aggTbl = preProcessedTbl .groupBy(groupOptions, groupingOrdinals: _*) - .aggregate(cudfAggsOnColumn.toSeq: _*) + .aggregate((cudfAggsOnColumn ++ advAggsOnColumns).toSeq: _*) withResource(aggTbl) { _ => - GpuColumnVector.from(aggTbl, postStepDataTypes.toArray) + // The output types of advanced aggs can not be predicated, instead need to + // infer them from the output columns. + val advAggTypes = (postStepDataTypes.length until aggTbl.getNumberOfColumns).map { + advColIx => AdvAggTypeUtils.infer(aggTbl.getColumn(advColIx)) + } + (GpuColumnVector.from(aggTbl, (postStepDataTypes ++ advAggTypes).toArray), + advOutLens.toSeq) } } } } + /** Similar as "preProcessWithAdvancedAggsAndClose" but perform the "postStep". */ + private def postProcessWithAdvancedAggsAndClose( + scb: SpillableColumnarBatch, + advArgLens: Seq[Int]): ColumnarBatch = { + // 1) Split the argument columns from the built-in aggregated columns + val (postedCb, argsCb) = withResource(scb) { _ => + withResource(scb.getColumnarBatch()) { cb => + // 2) Perform the post-process for the built-in aggregates. + val nonArgs = SpillableColumnarBatch( + GpuColumnVector.sliceColumns(cb, 0, advAggStart), + SpillPriorities.ACTIVE_BATCHING_PRIORITY) + closeOnExcept(postStepBound.projectAndCloseWithRetrySingleBatch(nonArgs)) { proCb => + (proCb, GpuColumnVector.sliceColumns(cb, advAggStart, cb.numCols())) + } + } + } + // 3) Perform the post-process for the advanced aggregates. + val outCols = new ArrayBuffer[Array[GpuColumnVector]]() + closeOnExcept(postedCb) { _ => + closeOnExcept(GpuColumnVector.extractColumns(argsCb)) { cols => + var idx = 0 + try { + advCudfAggregates.zip(advArgLens).foreach { case ((advAgg, _), argsLen) => + val endIdx = idx + argsLen + val args = cols.slice(idx, endIdx) + (idx until endIdx).foreach { i => + cols(i) = null // Avoid duplicate close on exceptions + } + // postStepAndClose is supposed to close the input columns "cols". + outCols += advAgg.postStepAndClose(scb.numRows(), args) + idx = endIdx + } + require(idx == cols.length) // all the columns should be consumed + } catch { + case t: Throwable => + outCols.flatten.safeClose(t) + throw t + } + } + } + // 4) Shuffle the columns in the original order. + mergeWithOriginalOrderAndClose(postedCb, outCols.toSeq) + } + + /** + * The given "batch" contains only the post-processed columns of the built-in + * aggregates, and the "advsCols" is the output of all the advanced aggregates. + * + * For easier process with advanced aggregates, it reorders the input aggregates + * to separate the advanced ones from the built-in ones earlier at the "preStep". + * And this should break the Spark's expectation on the output. + * + * So this function will merge the two parts and reorder them back to align with + * the original order of the aggregates to make sure the result is safe to return + * to Spark. + */ + private def mergeWithOriginalOrderAndClose( + batch: ColumnarBatch, + advsCols: Seq[Array[GpuColumnVector]]): ColumnarBatch = { + closeOnExcept(new ArrayBuffer[GpuColumnVector]()) { outCols => + var colIx = groupingAttributes.length + // first, group columns, move to out directly + outCols ++= (0 until colIx).map(batch.column(_).asInstanceOf[GpuColumnVector]) + var aggIx = 0 + advCudfAggregates.zip(advsCols).foreach { case ((_, advOrd), advCols) => + require(aggIx <= advOrd) + // 1 move all the columns before this advanced agg to out + // One non-advanced agg one column + val colsNum = advOrd - aggIx + outCols ++= (colIx until colIx + colsNum).map( ci => + batch.column(ci).asInstanceOf[GpuColumnVector] + ) + colIx += colsNum + // 2 append the advanced columns to out + outCols ++= advCols + aggIx = advOrd + 1 // + 1 to skip the advanced agg itself + } + // move remaining ones to out + if (colIx < batch.numCols()) { + outCols ++= (colIx until batch.numCols()).map(i => + batch.column(i).asInstanceOf[GpuColumnVector] + ) + } + new ColumnarBatch(outCols.toArray, batch.numRows()) + } + } + /** * Used to produce the outbound batch from the aggregate that could be * shuffled or could be passed through the evaluateExpression if we are in the final @@ -588,32 +815,32 @@ class AggHelper( * It takes a cuDF aggregated batch and applies the "post" step: * postUpdate for update, or postMerge for merge * - * @param resultBatch - cuDF aggregated batch + * @param aggregatedSpillable - cuDF aggregated batch + * @param advArgLens - argument sizes of advanced aggregates. * @return output batch from the aggregate */ def postProcess( aggregatedSpillable: SpillableColumnarBatch, + advArgLens: Seq[Int], metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = { - val postProcessed = NvtxRegistry.AGG_POST_PROCESS { - postStepBound.projectAndCloseWithRetrySingleBatch(aggregatedSpillable) + val computeTime = metrics.computeAggTime + val opTime = metrics.opTime + val postProcessed = NvtxIdWithMetrics(NvtxRegistry.POST_PROCESS_AGG, computeTime, opTime) { + if (advCudfAggregates.nonEmpty) { + postProcessWithAdvancedAggsAndClose(aggregatedSpillable, advArgLens) + } else { + postStepBound.projectAndCloseWithRetrySingleBatch(aggregatedSpillable) + } } SpillableColumnarBatch( postProcessed, SpillPriorities.ACTIVE_BATCHING_PRIORITY) } - def postProcess(input: Iterator[SpillableColumnarBatch], + def postProcess( + input: Iterator[(SpillableColumnarBatch, Seq[Int])], metrics: GpuHashAggregateMetrics): Iterator[SpillableColumnarBatch] = { - val computeAggTime = metrics.computeAggTime - val opTime = metrics.opTime - input.map { aggregated => - NvtxIdWithMetrics(NvtxRegistry.POST_PROCESS_AGG, computeAggTime, opTime) { - val postProcessed = postStepBound.projectAndCloseWithRetrySingleBatch(aggregated) - SpillableColumnarBatch( - postProcessed, - SpillPriorities.ACTIVE_BATCHING_PRIORITY) - } - } + input.map { case (aggregated, advArgLens) => postProcess(aggregated, advArgLens, metrics) } } } @@ -627,8 +854,9 @@ object GpuAggregateIterator extends Logging { def aggregate( helper: AggHelper, preProcessed: SpillableColumnarBatch, - metrics: GpuHashAggregateMetrics): SpillableColumnarBatch = { - helper.aggregate(metrics, preProcessed) + advArgLens: Seq[Int], + metrics: GpuHashAggregateMetrics): (SpillableColumnarBatch, Seq[Int]) = { + helper.aggregate(metrics, preProcessed, advArgLens) } /** @@ -652,33 +880,25 @@ object GpuAggregateIterator extends Logging { // in some cases casting and in others creating a struct (MERGE_M2 for instance, // requires a struct) // OOM retry happens within the projection in preProcess - val preProcessed = helper.preProcess(inputBatch, metrics) + val (preProcessed, advArgLens) = helper.preProcess(inputBatch, metrics) // 2) perform the aggregation // OOM retry means we could get a list of batches - val aggregatedSpillable = aggregate(helper, preProcessed, metrics) + val (aggregatedSpillable, advLens) = aggregate(helper, preProcessed, advArgLens, metrics) // 3) a post-processing step required in some scenarios, casting or picking // apart a struct - helper.postProcess(aggregatedSpillable, metrics) + helper.postProcess(aggregatedSpillable, advLens, metrics) } } def computeAggregateWithoutPreprocessAndClose( metrics: GpuHashAggregateMetrics, - inputBatches: Iterator[ColumnarBatch], + spillableInput: Iterator[(SpillableColumnarBatch, Seq[Int])], helper: AggHelper): Iterator[SpillableColumnarBatch] = { - val computeAggTime = metrics.computeAggTime - val opTime = metrics.opTime // 1) a pre-processing step required before we go into the cuDF aggregate, This has already // been done and is skipped - val spillableInput = inputBatches.map { cb => - withResource(new MetricRange(computeAggTime, opTime)) { _ => - SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_BATCHING_PRIORITY) - } - } - // 2) perform the aggregation // OOM retry means we could get a list of batches val aggregatedSpillable = helper.aggregateWithoutCombine(metrics, spillableInput) @@ -730,10 +950,7 @@ object GpuAggFirstPassIterator { aggHelper: AggHelper, metrics: GpuHashAggregateMetrics ): Iterator[SpillableColumnarBatch] = { - val preprocessProjectIter = cbIter.map { cb => - val sb = SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) - aggHelper.preStepBound.projectAndCloseWithRetrySingleBatch(sb) - } + val preprocessProjectIter = cbIter.map(aggHelper.preProcess(_, metrics)) computeAggregateWithoutPreprocessAndClose(metrics, preprocessProjectIter, aggHelper) } } @@ -752,7 +969,7 @@ object GpuAggFirstPassIterator { // (GpuAverage => CudfSum/CudfCount) // * boundResultReferences: project the result expressions Spark expects in the output. case class BoundExpressionsModeAggregates( - boundFinalProjections: Option[Seq[GpuExpression]], + boundFinalProjections: Option[(Seq[GpuExpression], Seq[AggregateUtils.AdvancedAggHandler])], boundResultReferences: Seq[Expression]) object GpuAggFinalPassIterator { @@ -776,9 +993,30 @@ object GpuAggFinalPassIterator { aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes) val boundFinalProjections = if (modeInfo.hasFinalMode || modeInfo.hasCompleteMode) { - val finalProjections = groupingAttributes ++ - aggregateExpressions.map(_.aggregateFunction.evaluateExpression) - Some(GpuBindReferences.bindGpuReferences(finalProjections, aggBufferAttributes)) + var idx = groupingAttributes.length + val advFns = new ArrayBuffer[AggregateUtils.AdvancedAggHandler]() + val finalProjections = groupingAttributes ++ aggregateExpressions.flatMap { expr => + val aggFn = expr.aggregateFunction + val ret = aggFn match { + case advFn: GpuAdvancedAggregateFunction => + // Collect the "argument start" and "argument length" for every advanced + // aggregate. + val postProcess: AggregateUtils.AdvancedStep = (numRows, args) => { + Array(advFn.postProcessAndClose(numRows, args)) + } + val aggBufLen = aggFn.aggBufferAttributes.length + advFns += ((postProcess, idx, aggBufLen)) + idx += aggBufLen + // Put the arguments of advance agg to the output batch + advFn.aggBufferAttributes.asInstanceOf[Seq[Expression]] + case _ => + idx += 1 + Seq(aggFn.evaluateExpression) + } + ret + } + Some((GpuBindReferences.bindGpuReferences(finalProjections, aggBufferAttributes), + advFns.toSeq)) } else { None } @@ -831,9 +1069,14 @@ object GpuAggFinalPassIterator { val opTime = metrics.opTime cbIter.map { batch => NvtxIdWithMetrics(NvtxRegistry.FINALIZE_AGG, aggTime, opTime) { - val finalBatch = boundExpressions.boundFinalProjections.map { exprs => - GpuProjectExec.projectAndCloseWithRetrySingleBatch( + val finalBatch = boundExpressions.boundFinalProjections.map { case (exprs, advFns) => + val cb = GpuProjectExec.projectAndCloseWithRetrySingleBatch( SpillableColumnarBatch(batch, SpillPriorities.ACTIVE_BATCHING_PRIORITY), exprs) + if (advFns.nonEmpty) { + processAdvancedAggsAndClose(cb, advFns) + } else { + cb + } }.getOrElse(batch) val finalSCB = SpillableColumnarBatch(finalBatch, SpillPriorities.ACTIVE_BATCHING_PRIORITY) @@ -849,15 +1092,63 @@ object GpuAggFinalPassIterator { val opTime = metrics.opTime sbIter.map { sb => NvtxIdWithMetrics(NvtxRegistry.FINALIZE_AGG, aggTime, opTime) { - val finalBatch = boundExpressions.boundFinalProjections.map { exprs => - SpillableColumnarBatch( - GpuProjectExec.projectAndCloseWithRetrySingleBatch(sb, exprs), - SpillPriorities.ACTIVE_BATCHING_PRIORITY) + val finalBatch = boundExpressions.boundFinalProjections.map { case (exprs, advFns) => + val cb = GpuProjectExec.projectAndCloseWithRetrySingleBatch(sb, exprs) + val mixedCb = if (advFns.nonEmpty) { + processAdvancedAggsAndClose(cb, advFns) + } else { + cb + } + SpillableColumnarBatch(mixedCb, SpillPriorities.ACTIVE_BATCHING_PRIORITY) }.getOrElse(sb) reorderFinalBatch(finalBatch, boundExpressions, metrics) } } } + + /** + * The input batch "inputCb" contains the final columns of the built-in aggregates + * and the argument columns of the advanced aggregates. + * + * This function extracts the argument columns from the input batch, and perform + * the final "postProcess" action, then insert the result columns into the output + * batch at the correct position for each advanced aggregate. + * It also passes through the final columns of built-in aggregates to the output + * batch. + */ + private[this] def processAdvancedAggsAndClose(inputCb: ColumnarBatch, + processOps: Seq[AggregateUtils.AdvancedAggHandler]): ColumnarBatch = { + closeOnExcept(GpuColumnVector.extractColumns(inputCb)) { cols => + val outCols = new ArrayBuffer[GpuColumnVector]() + var idx = 0 + closeOnExcept(outCols) { _ => + processOps.foreach { case (advAggAndClose, inputStartPos, inputLen) => + require(idx <= inputStartPos) + val endIdx = inputStartPos + inputLen + // 1 Move non-advanced agg columns to out + (idx until inputStartPos).foreach { i => + outCols += cols(i) + cols(i) = null // avoid duplicate close on exceptions + } + val args = cols.slice(inputStartPos, endIdx) + (inputStartPos until endIdx).foreach { i => + cols(i) = null // avoid duplicate close on exceptions + } + // 2 process the current advanced agg and append the results to out + outCols ++= advAggAndClose(inputCb.numRows(), args) + idx = endIdx + } // end of "processOps.foreach" + + if (idx < cols.length) { // Move remaining columns to out directly + (idx until cols.length).foreach { i => + outCols += cols(i) + cols(i) = null // avoid duplicate close on exceptions + } + } + } // end of "closeOnExcept(outCols)" + new ColumnarBatch(outCols.toArray, inputCb.numRows()) + } + } } @@ -1106,19 +1397,27 @@ class GpuMergeAggregateIterator( */ private def generateEmptyReductionBatch(): ColumnarBatch = { val aggregateFunctions = aggregateExpressions.map(_.aggregateFunction) - val defaultValues = - aggregateFunctions.flatMap(_.initialValues) - // We have to grab the semaphore in this scenario, since this is a reduction that produces - // rows on the GPU out of empty input, meaning that if a batch has 0 rows, a new single - // row is getting created with 0 as the count (if count is the operation), and other default - // values. - GpuSemaphore.acquireIfNecessary(TaskContext.get()) - val vecs = defaultValues.safeMap { ref => - withResource(GpuScalar.from(ref.asInstanceOf[GpuLiteral].value, ref.dataType)) { - scalar => GpuColumnVector.from(scalar, 1, ref.dataType) + val defaultValues = new ArrayBuffer[GpuScalar]() + closeOnExcept(defaultValues) { _ => + GpuSemaphore.acquireIfNecessary(TaskContext.get()) + // We have to grab the semaphore in this scenario, since this is a reduction that produces + // rows on the GPU out of empty input, meaning that if a batch has 0 rows, a new single + // row is getting created with 0 as the count (if count is the operation), and other default + // values. + aggregateFunctions.foreach { + case advFunc: GpuAdvancedAggregateFunction => + defaultValues ++= advFunc.defaultValues + case aggFunc => + defaultValues ++= aggFunc.initialValues.safeMap { case GpuLiteral(any, dt) => + GpuScalar(any, dt) + } } } - new ColumnarBatch(vecs.toArray, 1) + withResource(defaultValues) { _ => + val numRows = 1 + val vecs = defaultValues.toSeq.safeMap(GpuColumnVector.from(_, numRows)) + new ColumnarBatch(vecs.toArray, numRows) + } } } @@ -1487,7 +1786,6 @@ abstract class GpuTypedImperativeSupportedAggregateExecMeta[INPUT <: BaseAggrega allowSinglePassAgg = false, allowNonFullyAggregatedOutput = false, 1) - } else { super.convertToGpu() } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 21bc17c4b5d..00e2b2bef44 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2524,6 +2524,8 @@ object GpuOverrides extends Logging { a.evalType, a.udfDeterministic, a.resultId) }), GpuScalaUDFMeta.exprMeta, + GpuUDAFMeta.scalaUDAFMeta, + GpuUDAFMeta.scalaAggregatorMeta, expr[Rand]( "Generate a random column with i.i.d. uniformly distributed values in [0, 1)", ExprChecks.projectOnly(TypeSig.DOUBLE, TypeSig.DOUBLE, diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala index 38af5ee1113..3f3bf991fa9 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/HiveProviderImpl.scala @@ -20,18 +20,21 @@ import java.nio.charset.Charset import java.time.ZoneId import com.google.common.base.Charsets -import com.nvidia.spark.RapidsUDF +import com.nvidia.spark.{RapidsUDAF, RapidsUDF} import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.GpuUserDefinedFunction.udfTypeSig +import org.apache.hadoop.hive.ql.exec.UDAF +import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, HiveTableRelation} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.hive.{HiveGenericUDF, HiveSimpleUDF} +import org.apache.spark.sql.hive.{HiveGenericUDF, HiveSimpleUDF, HiveUDAFFunction} import org.apache.spark.sql.hive.execution.HiveTableScanExec import org.apache.spark.sql.hive.rapids.GpuHiveTextFileUtils._ import org.apache.spark.sql.hive.rapids.shims.HiveProviderCmdShims import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.rapids.aggregate.{AdvAggTypeUtils, CpuToGpuAggregateBufferConverter, GpuToCpuAggregateBufferConverter} import org.apache.spark.sql.rapids.execution.TrampolineUtil import org.apache.spark.sql.rapids.shims.SparkSessionUtils import org.apache.spark.sql.types._ @@ -128,6 +131,67 @@ class HiveProviderImpl extends HiveProviderCmdShims { childExprs.map(_.convertToGpu())) } } + }), + GpuOverrides.expr[HiveUDAFFunction]( + "Hive user defined aggregate function, the UDAF can choose to implement" + + " a RAPIDS accelerated interface to get better performance", + ExprChecks.reductionAndGroupByAgg( + udfTypeSig, + TypeSig.all, + repeatingParamCheck = Some(RepeatingParamCheck("param", udfTypeSig, TypeSig.all))), + (a, conf, p, r) => new TypedImperativeAggExprMeta[HiveUDAFFunction](a, conf, p, r) { + + @scala.annotation.nowarn("msg=is deprecated") + private val opRapidsFunc = { + val hiveUDAF = if (a.isUDAFBridgeRequired) { + a.funcWrapper.createFunction[UDAF]() + } else { + a.funcWrapper.createFunction[AbstractGenericUDAFResolver]() + } + hiveUDAF match { + case rapidsUDAF: RapidsUDAF => Some(rapidsUDAF) + case _ => None + } + } + + override def tagAggForGpu(): Unit = { + if (opRapidsFunc.isEmpty) { + willNotWorkOnGpu(s"Hive UDAF ${a.name} implemented by " + + s"${a.funcWrapper.functionClassName} does not provide a GPU implementation ") + } + } + + override def aggBufferAttribute: AttributeReference = { + opRapidsFunc.map { rapidsUDAF => + AdvAggTypeUtils.attrFromTypes(expr.name, rapidsUDAF.aggBufferTypes()) + }.getOrElse( + // opRapidsFunc is None, so it will fallback to CPU, use the CPU one. + expr.aggBufferAttributes.head + ) + } + + override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = { + GpuHiveUDAFFunction( + a.name, + a.funcWrapper, + childExprs, + a.nullable, + a.dataType, + a.isUDAFBridgeRequired) + } + + override val supportBufferConversion: Boolean = true + + override def createCpuToGpuBufferConverter(): CpuToGpuAggregateBufferConverter = { + (child: Expression) => + C2gHiveUDAFBufferTransition(child, HiveUDAFUtils.cpuAggBufferType(a), + aggBufferAttribute.dataType) + } + + override def createGpuToCpuBufferConverter(): GpuToCpuAggregateBufferConverter = { + (child: Expression) => + G2cHiveUDAFBufferTransition(child, HiveUDAFUtils.cpuAggBufferType(a)) + } }) ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala index 52dd46d8d80..d09b9d5ece8 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/hiveUDFs.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,14 +16,17 @@ package org.apache.spark.sql.hive.rapids -import com.nvidia.spark.RapidsUDF +import com.nvidia.spark.{RapidsUDAF, RapidsUDF} import com.nvidia.spark.rapids.GpuUserDefinedFunction -import org.apache.hadoop.hive.ql.exec.UDF -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF +import org.apache.hadoop.hive.ql.exec.{UDAF, UDF} +import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF} -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow, SafeProjection, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.hive.HiveUDAFFunction +import org.apache.spark.sql.rapids.aggregate.{CpuToGpuBufferTransition, GpuToCpuBufferTransition, GpuTypedUDAFFunctionBase} +import org.apache.spark.sql.types.{DataType, StructType} /** Common implementation across Hive UDFs */ trait GpuHiveUDFBase extends GpuUserDefinedFunction { @@ -68,3 +71,93 @@ case class GpuHiveGenericUDF( override lazy val function: RapidsUDF = funcWrapper.createFunction[GenericUDF]() .asInstanceOf[RapidsUDF] } + +case class GpuHiveUDAFFunction( + name: String, + funcWrapper: HiveFunctionWrapper, + children: Seq[Expression], + nullable: Boolean, + dataType: DataType, + isUDAFBridgeRequired: Boolean) extends GpuTypedUDAFFunctionBase { + + @scala.annotation.nowarn("msg=is deprecated") + @transient + override lazy val function: RapidsUDAF = if (isUDAFBridgeRequired) { + funcWrapper.createFunction[UDAF]().asInstanceOf[RapidsUDAF] + } else { + funcWrapper.createFunction[AbstractGenericUDAFResolver]().asInstanceOf[RapidsUDAF] + } +} + +object HiveUDAFUtils { + private[rapids] def cpuAggBufferType(hiveUDAF: HiveUDAFFunction): DataType = { + try { + // 'partialResultDataType' is private, so have to get it via the reflection. + val pdtMethod = hiveUDAF.getClass.getMethod( + "org$apache$spark$sql$hive$HiveUDAFFunction$$partialResultDataType") + pdtMethod.invoke(hiveUDAF).asInstanceOf[DataType] + } catch { + case t: Throwable => throw new IllegalStateException("Can not get the aggregate " + + "buffer type via 'partialResultDataType' from CPU HiveUDAFFunction", t) + } + } +} + +case class G2cHiveUDAFBufferTransition( + child: Expression, + cpuBufType: DataType) extends GpuToCpuBufferTransition { + private lazy val unsafeProj = if (cpuBufType.isInstanceOf[StructType]) { + // GPU always uses a struct type for agg buffer, but CPU does not, depending on + // the users implementation. So if a struct is used by CPU, then no need to + // flatten it here. + UnsafeProjection.create(Array(child.dataType)) + } else { + UnsafeProjection.create(child.dataType.asInstanceOf[StructType].map(_.dataType).toArray) + } + + private lazy val wrapRow: InternalRow => InternalRow = + if (cpuBufType.isInstanceOf[StructType]) { + // CPU expects a single struct column + val wrappedRow = new GenericInternalRow(1) + inputRow => { + wrappedRow.update(0, inputRow) + wrappedRow + } + } else { + identity[InternalRow] + } + + override protected def nullSafeEval(input: Any): Array[Byte] = { + unsafeProj(wrapRow(input.asInstanceOf[InternalRow])).getBytes + } +} + +case class C2gHiveUDAFBufferTransition( + child: Expression, + cpuBufType: DataType, + gpuType: DataType) extends CpuToGpuBufferTransition { + override val dataType: DataType = gpuType + + // GPU always uses a struct type for agg buffer, but CPU does not, depending on + // the users implementation. So if a struct is used by CPU, then no need to + // flatten it here. + private lazy val projTypes = if (cpuBufType.isInstanceOf[StructType]) { + Array(gpuType) + } else { + gpuType.asInstanceOf[StructType].map(_.dataType).toArray + } + private lazy val row = new UnsafeRow(projTypes.length) + private lazy val objectProj: InternalRow => InternalRow = + if (cpuBufType.isInstanceOf[StructType]) { + inputRow => + SafeProjection.create(projTypes)(inputRow).get(0, gpuType).asInstanceOf[InternalRow] + } else { + inputRow => SafeProjection.create(projTypes)(inputRow) + } + + override protected def nullSafeEval(input: Any): InternalRow = { + val bytes = input.asInstanceOf[Array[Byte]] + row.pointTo(bytes, bytes.length) + objectProj(row) + } +} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala index 3a6e940196f..e048adaf55b 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuScalaUDF.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ package org.apache.spark.sql.rapids import java.lang.invoke.SerializedLambda +import scala.reflect.ClassTag + import com.nvidia.spark.RapidsUDF import com.nvidia.spark.rapids._ @@ -50,7 +52,7 @@ object GpuScalaUDFMeta { repeatingParamCheck = Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))), (expr, conf, p, r) => new ExprMeta(expr, conf, p, r) { - lazy val opRapidsFunc = GpuScalaUDF.getRapidsUDFInstance(expr.function) + lazy val opRapidsFunc = GpuScalaUDF.getRapidsUDFInstance[RapidsUDF](expr.function) override def tagExprForGpu(): Unit = { if (opRapidsFunc.isEmpty && !this.conf.isCpuBasedUDFEnabled) { @@ -615,9 +617,9 @@ object GpuScalaUDF { * returning the instance if it does. The lambda wrapper that Spark applies to Java UDFs will be * inspected if necessary to locate the user's UDF instance. */ - def getRapidsUDFInstance(function: AnyRef): Option[RapidsUDF] = { + def getRapidsUDFInstance[F: ClassTag](function: AnyRef): Option[F] = { function match { - case f: RapidsUDF => Some(f) + case f: F => Some(f) case f => try { // This may be a lambda that Spark's UDFRegistration wrapped around a Java UDF instance. @@ -632,7 +634,7 @@ object GpuScalaUDF { val serializedLambda = writeReplace.invoke(f).asInstanceOf[SerializedLambda] if (serializedLambda.getCapturedArgCount == 1) { serializedLambda.getCapturedArg(0) match { - case c: RapidsUDF => Some(c) + case c: F => Some(c) case _ => None } } else { diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala new file mode 100644 index 00000000000..c8ca2c15754 --- /dev/null +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/udaf.scala @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.rapids.aggregate + +import ai.rapids.cudf.{ColumnVector, ColumnView, DType, GroupByAggregationOnColumn, Scalar} +import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation} +import com.nvidia.spark.rapids.{ExprChecks, ExprRule, GpuColumnVector, GpuExpression, GpuOverrides, GpuScalar, GpuUnsignedIntegerType, GpuUnsignedLongType, GpuUserDefinedFunction, ImperativeAggExprMeta, RepeatingParamCheck, TypedImperativeAggExprMeta, TypeSig} +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} +import com.nvidia.spark.rapids.RapidsPluginImplicits.{AutoCloseableProducingArray, AutoCloseableProducingSeq} + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, SafeProjection, UnsafeProjection, UnsafeRow, UserDefinedExpression} +import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF} +import org.apache.spark.sql.rapids.GpuScalaUDF +import org.apache.spark.sql.types._ + +/** + * Co-work with a GpuAdvancedAggregateFunction to customize the aggregate computation. + */ +trait AdvancedCudfAggregate extends Serializable { + /** + * Do some optional pre-process before executing the "reduce" or "aggregateXXX". + * The output will be fed to "reduce" or "aggregateXXX". + * + * Now this is only called for the "merge" stage of an aggregate, it plays the + * role similar as "preMerge" in a GpuAggregateFunction. + */ + def preStepAndClose(numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = args + // Similar as "reductionAggregate" in the CudfAggregate + def reduce(numRows: Int, preStepData: Array[GpuColumnVector]): Array[GpuScalar] + // Similar as "groupByAggregate" in the CudfAggregate + def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] + + /** + * If true, "aggregateAdvanced" will be executed instead of "aggregate" to get more + * control on the aggregate computation. Otherwise, "aggregate" is always called. + */ + def supportAdvanced: Boolean = false + + /** + * An advanced version of aggregate giving more control on the aggregate computation + * to perform custom aggregation on data that has been grouped by keys. + * The data is grouped, with offsets indicating group boundaries. + * + * @param keyOffsets A ColumnVector containing the start offset for each group. + * The end offset for group i is `keyOffsets[i+1]` (or total + * rows for the last group). + * @param groupedData An array of ColumnVectors containing the actual data + * columns, sorted and organized by the grouping keys. + * @return An array of ColumnVectors with one row per group, containing the + * aggregated results. + */ + def aggregateAdvanced( + keyOffsets: ColumnVector, + groupedData: Array[GpuColumnVector]): Array[GpuColumnVector] = { + throw new UnsupportedOperationException("Children should override this if " + + "setting 'supportAdvanced' to true") + } + + /** + * Do some optional post-process after executing the "reduce" or "aggregateXXX". + * The output will be return to Spark, so it should match the aggregate buffer schema. + * + * It plays the role similar as "postUpdate" or "postMerge" in a GpuAggregateFunction. + */ + def postStepAndClose( + numRows: Int, + aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = aggregatedData +} + +/** + * An aggregation function that supports to customize the aggregate computations for + * almost all the core stages of the GPU hash aggregate process. + * + * This is designed for UDAF support on GPU, but it is not a good idea to put things named + * "xxxUDAFxxx" directly into the GpuHashAggregateExec. + */ +trait GpuAdvancedAggregateFunction extends GpuAggregateFunction with UserDefinedExpression + with Serializable { + // Similar as "initialValues" in the GpuAggregateFunction + def defaultValues: Array[GpuScalar] + // Similar as "inputProjection" in the GpuAggregateFunction + def preProcessAndClose(numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = { + args + } + // Similar as "updateAggregates" in the GpuAggregateFunction + def updateAggregate(): AdvancedCudfAggregate + // Similar as "mergeAggregates" in the GpuAggregateFunction + def mergeAggregate(): AdvancedCudfAggregate + // Similar as "evaluateExpression" in the GpuAggregateFunction + def postProcessAndClose(numRows: Int, args: Array[GpuColumnVector]): GpuColumnVector + + override final lazy val inputProjection: Seq[Expression] = children + + override final lazy val initialValues: Seq[Expression] = { + throw new UnsupportedOperationException("Gpu advanced aggregate function" + + " does not support 'initialValues', call 'defaultValues' instead.") + } + override final lazy val updateAggregates: Seq[CudfAggregate] = { + throw new UnsupportedOperationException("Gpu advanced aggregate function" + + " does not support 'updateAggregates', call 'updateAggregate' instead.") + } + override final lazy val mergeAggregates: Seq[CudfAggregate] = { + throw new UnsupportedOperationException("Gpu advanced aggregate function" + + " does not support 'mergeAggregates', call 'mergeAggregate' instead.") + } + override final lazy val evaluateExpression: Expression = { + throw new UnsupportedOperationException("Gpu advanced aggregate function" + + " does not support 'evaluateExpression', call 'postProcess' instead.") + } +} + +/** + * The wrapper of a RapidsUDAFGroupByAggregation to interact with the GPU hash + * aggregate process via GPU columns or scalars. + */ +private[aggregate] class UDAFCudfAggregate( + inputAggBufferTypes: Array[DataType], + udafAgg: RapidsUDAFGroupByAggregation) extends AdvancedCudfAggregate { + + // Type of UDAF check is done by initialing this field when constructing an instance. + override val supportAdvanced: Boolean = udafAgg match { + // "RapidsAdvancedGroupByAggregation => true" will be supported in the future. + case _: RapidsSimpleGroupByAggregation => false + case u => + throw new UnsupportedOperationException(s"${u.getClass} is NOT a child of " + + "'RapidsSimpleGroupByAggregation'") + } + + override def preStepAndClose( + numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = { + closeOnExcept(udafAgg.preStep(numRows, args.map(_.getBase))) { preCols => + val inputTypes = args.map(_.dataType()) + try { + // try the input types first + preCols.zip(inputTypes).map { case (cudfCol, dt) => + GpuColumnVector.fromChecked(cudfCol, dt) + } + } catch { + case _: IllegalArgumentException => + // some transformation is made, so infer the types from the outputs + preCols.map { cudfCol => + GpuColumnVector.fromChecked(cudfCol, AdvAggTypeUtils.infer(cudfCol)) + } + } + } + } + + override def reduce(numRows: Int, preStepData: Array[GpuColumnVector]): Array[GpuScalar] = { + closeOnExcept(udafAgg.reduce(numRows, preStepData.map(_.getBase))) { reducedRet => + reducedRet.safeMap { cuScalar => + GpuScalar(cuScalar, AdvAggTypeUtils.infer(cuScalar)) + } + } + } + + override def aggregateAdvanced( + keyOffsets: ColumnVector, + groupedData: Array[GpuColumnVector]): Array[GpuColumnVector] = { + // Should not come here, just in case + throw new UnsupportedOperationException("`RapidsAdvancedGroupByAggregation`" + + " is not supported yet") + } + + override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = { + udafAgg.asInstanceOf[RapidsSimpleGroupByAggregation].aggregate(inputIndices) + } + + override def postStepAndClose( + numRows: Int, + aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = { + closeOnExcept(udafAgg.postStep(aggregatedData.map(_.getBase))) { postCols => + require(postCols.length == inputAggBufferTypes.length, + "The sizes of the 'postStep' and 'aggregationBufferTypes' outputs does " + + s"not match. Sizes: ${postCols.length} vs ${inputAggBufferTypes.length}") + try { + postCols.zip(inputAggBufferTypes).map { case (cudfCol, dt) => + GpuColumnVector.fromChecked(cudfCol, dt) + } + } catch { + case iae: IllegalArgumentException => + throw new RuntimeException("The output of the 'postStep' does not match " + + "the given aggregate buffer types", iae) + } + } + } +} + +object AdvAggTypeUtils { + /** + * Infer the Spark type from the given cuDF ColumnView. + * + * This returned Spark type can not be used to interact with the Spark world, but + * only for the GPU process internally when asking for a ColumnarBatch without given + * Spark type. Because it may not always reflect the + * original Spark type. e.g. + * A List of Struct column in cuDF may be either from MapType or the real List + * of Struct type in Spark. + * A INT32 column in cuDF may be from either YearMonthIntervalType or IntegerType + * in Spark. + * + * It is designed for the "preStep" and "reduce/aggregate" operations in our GPU + * advanced aggregates. + */ + def infer(col: ColumnView): DataType = col.getType match { + case DType.LIST => + val childType = withResource(col.getChildColumnView(0))(infer) + ArrayType(childType, col.getNullCount > 0) + case DType.STRUCT => + val fields = (0 until col.getNumChildren).map { i => + withResource(col.getChildColumnView(i)) { chdView => + val chdType = infer(chdView) + StructField(s"_cudf_${chdView.getType}_$i", chdType, chdView.getNullCount > 0) + } + } + StructType(fields) + case nonNested => fromNonNested(nonNested) + } + + /** + * Infer the Spark type from the given cuDF Scalar, similar as infer(ColumnView). + */ + def infer(scalar: Scalar): DataType = scalar.getType match { + case DType.LIST => + val childType = withResource(scalar.getListAsColumnView)(infer) + ArrayType(childType, !scalar.isValid) + case DType.STRUCT => + val fields = withResource(scalar.getChildrenFromStructScalar) { childrenViews => + childrenViews.zipWithIndex.map { case (chdView, i) => + val chdType = infer(chdView) + StructField(s"_cudf_${chdView.getType}_$i", chdType, chdView.getNullCount > 0) + } + } + StructType(fields) + case nonNested => fromNonNested(nonNested) + } + + private def fromNonNested(dType: DType): DataType = dType match { + case DType.BOOL8 => BooleanType + case DType.INT8 => ByteType + case DType.INT16 => ShortType + case DType.INT32 => IntegerType + case DType.INT64 => LongType + case DType.FLOAT32 => FloatType + case DType.FLOAT64 => DoubleType + case DType.TIMESTAMP_DAYS => DateType + case DType.TIMESTAMP_MICROSECONDS => TimestampType + case DType.STRING => StringType + case DType.UINT32 => GpuUnsignedIntegerType + case DType.UINT64 => GpuUnsignedLongType + case dType if dType.isDecimalType => + val precision = dType.getTypeId match { + case DType.DTypeEnum.DECIMAL32 => 9 + case DType.DTypeEnum.DECIMAL64 => 18 + case DType.DTypeEnum.DECIMAL128 => 38 + case _ => throw new IllegalArgumentException(s"Unsupported decimal type: $dType") + } + DecimalType(precision, -dType.getScale) + case _ => throw new IllegalArgumentException(s"Unsupported DType: $dType") + } + + /** + * Extract the children columns form the given struct column. These columns + * should be closed when no longer needed. + * The behavior is undefined if a non-struct column is specified. + */ + def extractChildren(structCol: GpuColumnVector): Array[GpuColumnVector] = { + val dt = structCol.dataType().asInstanceOf[StructType] + val baseCol = structCol.getBase + (0 until baseCol.getNumChildren).safeMap { i => + withResource(baseCol.getChildColumnView(i)) { childView => + GpuColumnVector.from(childView.copyToColumnVector(), dt(i).dataType) + } + }.toArray + } + + /** + * Create an attribute of struct type from the given types. + */ + def attrFromTypes( + name: String, + aggBufTypes: Array[DataType]): AttributeReference = { + val aggType = StructType(aggBufTypes.zipWithIndex.map { case (dt, id) => + StructField(s"_${name}_child$id", dt) + }) + AttributeReference(s"${name}_buf", aggType)() + } +} + +/** Common implementation for all the types of GPU UDAF interface. */ +trait GpuUDAFFunctionBase extends GpuAdvancedAggregateFunction + with UserDefinedExpression { + + /** User's UDAF instance */ + protected def function: RapidsUDAF + + protected lazy val aggBufferTypes: Array[DataType] = function.aggBufferTypes() + + override def defaultValues: Array[GpuScalar] = { + closeOnExcept(function.getDefaultValue) { udafDefValues => + require(udafDefValues.length == aggBufferTypes.length, + s"The default values number (${udafDefValues.length}) is NOT equal to " + + s"the aggregation buffers number(${aggBufferTypes.length})") + udafDefValues.zip(aggBufferTypes).map { case (scalar, dt) => + GpuScalar(scalar, dt) + } + } + } + + override def preProcessAndClose( + numRows: Int, args: Array[GpuColumnVector]): Array[GpuColumnVector] = { + closeOnExcept(function.preProcess(numRows, args.map(_.getBase))) { preCols => + val inputTypes = args.map(_.dataType()) + try { + // try the input types first + preCols.zip(inputTypes).map { case (cudfCol, dt) => + GpuColumnVector.fromChecked(cudfCol, dt) + } + } catch { + case _: IllegalArgumentException => + // some transformation is made, so infer the types from the outputs + preCols.map { cudfCol => + GpuColumnVector.fromChecked(cudfCol, AdvAggTypeUtils.infer(cudfCol)) + } + } + } + } + + override def postProcessAndClose( + numRows: Int, + args: Array[GpuColumnVector]): GpuColumnVector = { + closeOnExcept(function.postProcess(numRows, args.map(_.getBase), dataType)) { postCol => + try { + GpuColumnVector.fromChecked(postCol, dataType) + } catch { + case iae: IllegalArgumentException => + throw new RuntimeException("The output of the 'postProcess' does not match " + + "the UDAF result type", iae) + } + } + } + + override def updateAggregate(): AdvancedCudfAggregate = { + new UDAFCudfAggregate(aggBufferTypes, function.updateAggregation()) + } + + override def mergeAggregate(): AdvancedCudfAggregate = { + // merge will leverage the "preStepAndClose" method of the AdvancedCudfAggregate, + // so specify the 'preProcessOutLen' to None. + new UDAFCudfAggregate(aggBufferTypes, function.mergeAggregation()) + } +} + +case class GpuScalaUDAF( + function: RapidsUDAF, + dataType: DataType, + children: Seq[Expression], + udafName: Option[String], + nullable: Boolean) extends GpuUDAFFunctionBase { + + override val name: String = udafName.getOrElse(function.getClass.getSimpleName) + + override lazy val aggBufferAttributes: Seq[AttributeReference] = + aggBufferTypes.zipWithIndex.map { case (dt, id) => + AttributeReference(s"${name}_$id", dt)() + } +} + +/** + * Co-worked with GpuTypedUDAFFunctionBase to support the process of the + * aggregate buffer for TypedImperativeAggregate in Spark. + */ +private[aggregate] class TypeUDAFCudfAggregate( + aggBufferAttr: AttributeReference, + inputAggBufferTypes: Array[DataType], + udafAgg: RapidsUDAFGroupByAggregation +) extends UDAFCudfAggregate(inputAggBufferTypes, udafAgg) { + override def preStepAndClose(numRows: Int, + args: Array[GpuColumnVector]): Array[GpuColumnVector] = { + require((args.length == 1) && args.head.dataType().isInstanceOf[StructType], + "preStep expects only one struct column as the input") + val children = withResource(args.head)(AdvAggTypeUtils.extractChildren) + super.preStepAndClose(numRows, children) + } + + override def postStepAndClose(numRows: Int, + aggregatedData: Array[GpuColumnVector]): Array[GpuColumnVector] = { + withResource(super.postStepAndClose(numRows, aggregatedData)) { ret => + val cudfCol = ColumnVector.makeStruct(numRows.toLong, ret.map(_.getBase): _*) + Array(GpuColumnVector.from(cudfCol, aggBufferAttr.dataType)) + } + } +} + +/** + * Aggregate function that leverages a single struct type buffer as the aggregate + * buffer, to match the Spark expectation for a TypedImperativeAggregate who is + * using a single aggregate buffer, e.g. ScalaAggregator and HiveUDAFFunction. + */ +trait GpuTypedUDAFFunctionBase extends GpuUDAFFunctionBase { + + override lazy val aggBufferAttributes: Seq[AttributeReference] = { + // The Spark expects a single aggregate buffer, so GPU has to build a + // single struct type with the buffer types as its children. + Seq(AdvAggTypeUtils.attrFromTypes(name, aggBufferTypes)) + } + + override def defaultValues: Array[GpuScalar] = { + val childrenCols = withResource(super.defaultValues) { defValues => + defValues.safeMap(s => ColumnVector.fromScalar(s.getBase, 1)) + } + val structScalar = withResource(childrenCols) { _ => + Scalar.structFromColumnViews(childrenCols: _*) + } + Array(GpuScalar(structScalar, aggBufferAttributes.head.dataType)) + } + + override def updateAggregate(): AdvancedCudfAggregate = { + new TypeUDAFCudfAggregate(aggBufferAttributes.head, aggBufferTypes, + function.updateAggregation()) + } + + override def mergeAggregate(): AdvancedCudfAggregate = { + new TypeUDAFCudfAggregate(aggBufferAttributes.head, aggBufferTypes, + function.mergeAggregation()) + } + + override def postProcessAndClose(numRows: Int, + args: Array[GpuColumnVector]): GpuColumnVector = { + require((args.length == 1) && args.head.dataType().isInstanceOf[StructType], + "postProcess expects only one struct column as the input") + val children = withResource(args.head)(AdvAggTypeUtils.extractChildren) + super.postProcessAndClose(numRows, children) + } +} + +case class GpuScalaAggregator( + function: RapidsUDAF, + children: Seq[Expression], + dataType: DataType, + nullable: Boolean, + aggregatorName: Option[String]) extends GpuTypedUDAFFunctionBase { + + override val name: String = aggregatorName.getOrElse(function.getClass.getSimpleName) +} + +case class C2gUDAFBufferTransition( + child: Expression, + gpuType: DataType) extends CpuToGpuBufferTransition { + override val dataType: DataType = gpuType + + private lazy val childrenTypes = gpuType.asInstanceOf[StructType].map(_.dataType) + private lazy val row = new UnsafeRow(childrenTypes.length) + private lazy val objectProj = SafeProjection.create(childrenTypes.toArray) + + override protected def nullSafeEval(input: Any): InternalRow = { + val bytes = input.asInstanceOf[Array[Byte]] + row.pointTo(bytes, bytes.length) + objectProj(row) + } +} + +case class G2cUDAFBufferTransition(child: Expression) extends GpuToCpuBufferTransition { + private lazy val unsafeProj = UnsafeProjection.create( + child.dataType.asInstanceOf[StructType].map(_.dataType).toArray + ) + + override protected def nullSafeEval(input: Any): Array[Byte] = { + unsafeProj(input.asInstanceOf[InternalRow]).getBytes + } +} + +object GpuUDAFMeta { + def scalaUDAFMeta: ExprRule[ScalaUDAF] = GpuOverrides.expr[ScalaUDAF]( + "User Defined Aggregate Function, the UDAF can choose to implement a RAPIDS" + + " accelerated interface to get better performance.", + ExprChecks.reductionAndGroupByAgg( + GpuUserDefinedFunction.udfTypeSig, + TypeSig.all, + repeatingParamCheck = + Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))), + (sUdaf, conf, p, r) => new ImperativeAggExprMeta(sUdaf, conf, p, r) { + private val opRapidsUDAF = GpuScalaUDF.getRapidsUDFInstance[RapidsUDAF](sUdaf.udaf) + + override def tagAggForGpu(): Unit = { + if (opRapidsUDAF.isEmpty) { + val udfClass = sUdaf.udaf.getClass + willNotWorkOnGpu(s"${sUdaf.name} implemented by $udfClass does not " + + s"provide a GPU implementation") + } + } + + override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = { + require(opRapidsUDAF.isDefined) + GpuScalaUDAF( + opRapidsUDAF.get, + sUdaf.dataType, + childExprs, + sUdaf.udafName, + sUdaf.nullable) + } + } + ) + + def scalaAggregatorMeta[IN, BUF, OUT]: ExprRule[ScalaAggregator[IN, BUF, OUT]] = + GpuOverrides.expr[ScalaAggregator[IN, BUF, OUT]]( + "User Defined Aggregator, it can choose to implement a RAPIDS" + + " accelerated interface to get better performance.", + ExprChecks.reductionAndGroupByAgg( + GpuUserDefinedFunction.udfTypeSig, + TypeSig.all, + repeatingParamCheck = + Some(RepeatingParamCheck("param", GpuUserDefinedFunction.udfTypeSig, TypeSig.all))), + (sAgg, conf, p, r) => new TypedImperativeAggExprMeta(sAgg, conf, p, r) { + private val opRapidsUDAF = GpuScalaUDF.getRapidsUDFInstance[RapidsUDAF](sAgg.agg) + + override def tagAggForGpu(): Unit = { + if (opRapidsUDAF.isEmpty) { + val udfClass = sAgg.agg.getClass + willNotWorkOnGpu(s"${sAgg.name} implemented by $udfClass does not " + + s"provide a GPU implementation") + } + } + + override def aggBufferAttribute: AttributeReference = { + opRapidsUDAF.map { rapidsUDAF => + AdvAggTypeUtils.attrFromTypes(sAgg.name, rapidsUDAF.aggBufferTypes()) + }.getOrElse( + // opRapidsUDAF is None, so it will fallback to CPU, use the CPU one. + sAgg.aggBufferAttributes.head + ) + } + + override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = { + require(opRapidsUDAF.isDefined) + GpuScalaAggregator( + opRapidsUDAF.get, + childExprs, + sAgg.dataType, + sAgg.nullable, + sAgg.aggregatorName) + } + + override val supportBufferConversion: Boolean = true + + override def createCpuToGpuBufferConverter(): CpuToGpuAggregateBufferConverter = { + (child: Expression) => C2gUDAFBufferTransition(child, aggBufferAttribute.dataType) + } + + override def createGpuToCpuBufferConverter(): GpuToCpuAggregateBufferConverter = { + (child: Expression) => G2cUDAFBufferTransition(child) + } + } + ) +} diff --git a/tests/src/test/resources/group_strings_ints_ints.csv b/tests/src/test/resources/group_strings_ints_ints.csv new file mode 100644 index 00000000000..332cceb096c --- /dev/null +++ b/tests/src/test/resources/group_strings_ints_ints.csv @@ -0,0 +1,50 @@ +"group1",1,141 +"group2",2,139 +"group3",3,138 +"group4",4,137 +"group5",5,136 +"group1",6,135 +"group2",7,134 +"group3",8,132 +"group4",9,131 +"group5",10,130 +"group1",11,129 +"group2",12,128 +"group3",13,127 +"group4",14,126 +"group5",15,125 +"group1",16,124 +"group2",17,123 +"group3",18,121 +"group4",19,119 +"group5",21,118 +"group1",22,117 +"group2",23,116 +"group3",23,115 +"group4",50,114 +"group5",49,113 +"group1",48,112 +"group2",47,112 +"group3",46,111 +"group4",45,110 +"group5",44,109 +"group1",43,108 +"group2",42,107 +"group3",41,106 +"group4",39,105 +"group5",38,104 +"group1",37,104 +"group2",36,104 +"group3",35,103 +"group4",34,102 +"group5",33,101 +"group1",32,199 +"group2",31,188 +"group3",30,177 +"group4",29,166 +"group5",28,155 +"group1",27,144 +"group2",26,133 +"group3",25,122 +"group4",24,111 +"group5",60,100 \ No newline at end of file diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala index 82fb1dd4154..fe44bbde83c 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/HashAggregateRetrySuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,7 +71,7 @@ class HashAggregateRetrySuite // attempt a cuDF reduction GpuAggregateIterator.aggregate( - aggHelper, input, mockMetrics) + aggHelper, input, Seq.empty, mockMetrics)._1 } def makeGroupByAggHelper(forceMerge: Boolean): AggHelper = { @@ -111,7 +111,8 @@ class HashAggregateRetrySuite GpuAggregateIterator.aggregate( makeGroupByAggHelper(forceMerge = false), input, - mockMetrics) + Seq.empty, + mockMetrics)._1 } test("computeAndAggregate reduction with retry") { diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala new file mode 100644 index 00000000000..206303c21c8 --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaAggregatorSuite.scala @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import ai.rapids.cudf.{ColumnVector, DType, GroupByAggregation, GroupByAggregationOnColumn, Scalar} +import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation} +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{functions, Encoder, Encoders} +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructField, StructType} + +class ScalaAggregatorSuite extends SparkQueryCompareTestSuite { + + IGNORE_ORDER_testSparkResultsAreEqual(testName = "Groupby with ScalaAggregator Average", + groupbyStringsIntsIntsFromCsv, repart = 7) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + df.createOrReplaceTempView("groupby_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator)) + df.sparkSession.sql(sqlText = """ + SELECT count(c1_int), intAverage(c1_int), max(c2_int), intAverage(c2_int) + FROM groupby_scala_average_udaf_test_table + GROUP BY key_str + """) + } + + IGNORE_ORDER_testSparkResultsAreEqual(testName = "Reduction with ScalaAggregator Average", + groupbyStringsIntsIntsFromCsv, repart = 7) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + df.createOrReplaceTempView("reduction_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator)) + df.sparkSession.sql(sqlText = """ + SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int) + FROM reduction_scala_average_udaf_test_table + """) + } + + private val emptyDfSchema = StructType(Seq( + StructField("key_str", StringType, nullable = true), + StructField("c1_int", IntegerType, nullable = true), + StructField("c2_int", IntegerType, nullable = true)) + ) + + IGNORE_ORDER_testSparkResultsAreEqual( + testName = "Reduction with ScalaAggregator Average on empty dataset", + ss => emptyRowsDf(ss, emptyDfSchema)) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + df.createOrReplaceTempView("reduction_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator)) + df.sparkSession.sql(sqlText = + """ + SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int) + FROM reduction_scala_average_udaf_test_table + """) + } + + Seq("partial", "final").foreach { replaceMode => + val fallType = if (replaceMode == "partial") "Gpu2Cpu" else "Cpu2Gpu" + IGNORE_ORDER_ALLOW_NON_GPU_testSparkResultsAreEqual( + testName = s"Groupby with $fallType ScalaAggregator Average", + groupbyStringsIntsIntsFromCsv, + repart = 7, + execsAllowedNonGpu = Seq("ObjectHashAggregateExec", "ProjectExec"), + conf = new SparkConf().set("spark.rapids.sql.hashAgg.replaceMode", replaceMode) + ) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + df.createOrReplaceTempView("groupby_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", functions.udaf(new IntAverageAggregator)) + df.sparkSession.sql(sqlText = + """ + SELECT count(c1_int), intAverage(c1_int), max(c2_int), intAverage(c2_int) + FROM groupby_scala_average_udaf_test_table + GROUP BY key_str + """) + } + } +} + +case class AverageBuffer(var sum: java.lang.Long, var count: Long) + +class IntAverageAggregator extends Aggregator[Integer, AverageBuffer, Integer] with RapidsUDAF { + + // ===== CPU Spark Aggregator Implementation ===== + // A zero value for this aggregation. Should satisfy the property that any b + zero = b + override def zero: AverageBuffer = AverageBuffer(null, 0L) + + // Combine two values to produce a new value. For performance, the function may + // modify `buffer` and return it instead of constructing a new object + override def reduce(buffer: AverageBuffer, data: Integer): AverageBuffer = { + if (data != null) { + buffer.sum += data + buffer.count += 1 + } + buffer + } + + // Merge two intermediate values + override def merge(b1: AverageBuffer, b2: AverageBuffer): AverageBuffer = { + if (b2.sum != null) { + b1.sum += b2.sum + } + b1.count += b2.count + b1 + } + + // Transform the output of the reduction/aggregation + override def finish(reduction: AverageBuffer): Integer = { + // toInt is safe since no overflows here + if (reduction.count == 0) null else (reduction.sum / reduction.count).toInt + } + + // Specifies the Encoder for the intermediate value type + override def bufferEncoder: Encoder[AverageBuffer] = Encoders.product + // Specifies the Encoder for the final output value type + override def outputEncoder: Encoder[Integer] = Encoders.INT + + // ===== GPU RapidsUDAF Implementation ===== + override def getDefaultValue: Array[Scalar] = { + // Return default values for [sum, count] - these need to match the output of + // "updateAggregation" and also ideally match the output of initialize in the + // CPU version. + // Make sure that if we get an exception we do not leak memory + closeOnExcept(Scalar.fromNull(DType.INT64)) { nullScalar => + Array( + nullScalar, // null sum (Long) + Scalar.fromLong(0L) // 0 count (Long) + ) + } + } + + override def preProcess(numRows: Int, args: Array[ColumnVector]): Array[ColumnVector] = { + require(args.length == 1) + withResource(args.head) { intArg => + Array(intArg.castTo(DType.INT64)) // Cast int to long to avoid potential overflow + } + } + + override def postProcess(numRows: Int, args: Array[ColumnVector], + outType: DataType): ColumnVector = { + // Final step: divide sum by count to get average. Perform element-wise + // division: sum / count. + // Note that if the COUNT is 0 the SUM is null. + // This is to close the input "args" to avoid GPU memory leak. + val averageCol = withResource(args) { _ => + val sumCol = args(0) + val countCol = args(1) + sumCol.div(countCol) + } + withResource(averageCol) { averageCol => + // Cast to integers, no overflows here. + averageCol.castTo(DType.INT32) + } + } + + override def aggBufferTypes(): Array[DataType] = Array(LongType, LongType) + + override def updateAggregation(): RapidsUDAFGroupByAggregation = { + new RapidsSimpleGroupByAggregation() { + // "preStep" uses default implementation (pass-through) + + override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = { + // For reduction (no group-by keys), compute SUM and COUNT directly + val inputCol = preStepData(0) + // Make sure that we don't leak if there is an exception + closeOnExcept(inputCol.sum()) { sum => + val count = Scalar.fromLong(inputCol.getRowCount - inputCol.getNullCount) + Array(sum, count) + } + } + + override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = { + // For group-by aggregation, create SUM and COUNT operations + val colIndex = inputIndices(0) + Array( + GroupByAggregation.sum().onColumn(colIndex), + GroupByAggregation.count().onColumn(colIndex) + ) + } + + override def postStep(aggregatedData: Array[ColumnVector]): Array[ColumnVector] = { + // cudf count() aggregate produces an integer column, so convert it to + // Long to match the agg buffer type. + require(aggregatedData.length == 2, "Expect two columns for postStep during update") + withResource(aggregatedData) { _ => + Array(aggregatedData.head.incRefCount(), aggregatedData(1).castTo(DType.INT64)) + } + } + } + } + + override def mergeAggregation(): RapidsUDAFGroupByAggregation = { + new RapidsSimpleGroupByAggregation() { + // "preStep" uses default implementation (pass-through) + + override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = { + // Merge by summing both sum and count columns + val sumCol = preStepData(0) + val countCol = preStepData(1) + + // Avoid leaks even if there is an exception when merging countCol + closeOnExcept(sumCol.sum()) { mergedSum => + val mergedCount = countCol.sum() + Array(mergedSum, mergedCount) + } + } + + override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = { + // Merge by summing both columns + Array( + GroupByAggregation.sum().onColumn(inputIndices(0)), // sum of sums + GroupByAggregation.sum().onColumn(inputIndices(1)) // sum of counts + ) + } + + // "postStep" uses default implementation (pass-through) + } + } +} \ No newline at end of file diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala new file mode 100644 index 00000000000..2ae24c0fd4e --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/ScalaUDAFSuite.scala @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import ai.rapids.cudf.{ColumnVector, DType, GroupByAggregation, GroupByAggregationOnColumn, Scalar} +import com.nvidia.spark.{RapidsSimpleGroupByAggregation, RapidsUDAF, RapidsUDAFGroupByAggregation} +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} + +import org.apache.spark.sql.Row +import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} +import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructField, StructType} + +@scala.annotation.nowarn("msg= is deprecated") +class ScalaUDAFSuite extends SparkQueryCompareTestSuite { + + IGNORE_ORDER_testSparkResultsAreEqual(testName = "Groupby with ScalaUDAF Average", + groupbyStringsIntsIntsFromCsv) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + // "repartition(7)" is to avoid the Complete mode of the aggregate. + df.repartition(7).createOrReplaceTempView("groupby_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", new IntAverageUDAF) + df.sparkSession.sql(sqlText = """ + SELECT count(c1_int), intAverage(c1_int), max(c2_int), count(c2_int), + intAverage(c2_int), intAverage(c2_int + 1) + FROM groupby_scala_average_udaf_test_table + GROUP BY key_str + """) + } + + IGNORE_ORDER_testSparkResultsAreEqual(testName = "Reduction with ScalaUDAF Average", + groupbyStringsIntsIntsFromCsv) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + // "repartition(7)" is to avoid the Complete mode of the aggregate. + df.repartition(7).createOrReplaceTempView("reduction_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", new IntAverageUDAF) + df.sparkSession.sql(sqlText = """ + SELECT intAverage(c1_int), count(c1_int), max(c1_int), intAverage(c2_int), + intAverage(c2_int + 1), max(c2_int) + FROM reduction_scala_average_udaf_test_table + """) + } + + private val emptyDfSchema = StructType(Seq( + StructField("key_str", StringType, nullable = true), + StructField("c1_int", IntegerType, nullable = true), + StructField("c2_int", IntegerType, nullable = true)) + ) + + IGNORE_ORDER_testSparkResultsAreEqual( + testName = "Reduction with ScalaUDAF Average on empty dataset", + ss => emptyRowsDf(ss, emptyDfSchema)) { df => + // This is a basic smoke-test of the Scala UDAF framework, not an exhaustive test of + // the specific UDAF implementation itself. + df.createOrReplaceTempView("reduction_scala_average_udaf_test_table") + df.sparkSession.udf.register("intAverage", new IntAverageUDAF) + df.sparkSession.sql(sqlText = + """ + SELECT intAverage(c1_int), count(c1_int), intAverage(c2_int), max(c2_int) + FROM reduction_scala_average_udaf_test_table + """) + } +} + +@scala.annotation.nowarn("msg= is deprecated") +class IntAverageUDAF extends UserDefinedAggregateFunction with RapidsUDAF { + + // ===== CPU Spark UDAF Implementation ===== + override def inputSchema: StructType = StructType(Seq(StructField("intValue", IntegerType))) + + override def bufferSchema: StructType = StructType(Seq( + StructField("sum", LongType), + StructField("count", LongType) + )) + + override def dataType: DataType = IntegerType + + override def deterministic: Boolean = true + + override def initialize(buffer: MutableAggregationBuffer): Unit = { + buffer.update(0, null) // sum + buffer.update(1, 0L) // count + } + + override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { + if (!input.isNullAt(0)) { + buffer(0) = if(buffer.isNullAt(0)) { + input.getInt(0).toLong + } else { + buffer.getLong(0) + input.getInt(0) + } // sum + buffer(1) = buffer.getLong(1) + 1L // count + } + } + + override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { + if (buffer1.isNullAt(0) && !buffer2.isNullAt(0)) { + buffer1(0) = buffer2.getLong(0) + } else if (!buffer1.isNullAt(0) && !buffer2.isNullAt(0)) { + buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0) // sum + } else { + // NOOP buffer2(0) is null so buffer1 holds the correct value already + } + buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1) // count + } + + override def evaluate(buffer: Row): Any = { + val count = buffer.getLong(1) + // toInt is safe since no overflows here + if (count == 0) null else (buffer.getLong(0) / count).toInt + } + + // ===== GPU RapidsUDAF Implementation ===== + override def getDefaultValue: Array[Scalar] = { + // Return default values for [sum, count] - these need to match the output of + // "updateAggregation" and also ideally match the output of initialize in the + // CPU version. + // Make sure that if we get an exception we do not leak memory + closeOnExcept(Scalar.fromNull(DType.INT64)) { nullScalar => + Array( + nullScalar, // null sum (Long) + Scalar.fromLong(0L) // 0 count (Long) + ) + } + } + + override def preProcess(numRows: Int, args: Array[ColumnVector]): Array[ColumnVector] = { + require(args.length == 1) + withResource(args.head) { intArg => + Array(intArg.castTo(DType.INT64)) // Cast int to long to avoid potential overflow + } + } + + override def postProcess(numRows: Int, args: Array[ColumnVector], + outType: DataType): ColumnVector = { + // Final step: divide sum by count to get average. Perform element-wise + // division: sum / count. + // Note that if the COUNT is 0 the SUM is null. + val averageCol = withResource(args) { _ => + val sumCol = args(0) + val countCol = args(1) + sumCol.div(countCol) + } + withResource(averageCol) { averageCol => + // Cast to integers, no overflows here. + averageCol.castTo(DType.INT32) + } + } + + override def aggBufferTypes(): Array[DataType] = bufferSchema.map(_.dataType).toArray + + override def updateAggregation(): RapidsUDAFGroupByAggregation = { + new RapidsSimpleGroupByAggregation() { + // "preStep" uses default implementation (pass-through) + + override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = { + // For reduction (no group-by keys), compute SUM and COUNT directly + val inputCol = preStepData(0) + // Make sure that we don't leak if there is an exception + closeOnExcept(inputCol.sum()) { sum => + val count = Scalar.fromLong(inputCol.getRowCount - inputCol.getNullCount) + Array(sum, count) + } + } + + override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = { + // For group-by aggregation, create SUM and COUNT operations + val colIndex = inputIndices(0) + Array( + GroupByAggregation.sum().onColumn(colIndex), + GroupByAggregation.count().onColumn(colIndex) + ) + } + + override def postStep(aggregatedData: Array[ColumnVector]): Array[ColumnVector] = { + // cudf count() aggregate produces an integer column, so convert it to Long + // to match the agg buffer type. + require(aggregatedData.length == 2, "Expect two columns for postStep during update") + withResource(aggregatedData) { _ => + // sum, count + Array(aggregatedData.head.incRefCount(), aggregatedData(1).castTo(DType.INT64)) + } + } + } + } + + override def mergeAggregation(): RapidsUDAFGroupByAggregation = { + new RapidsSimpleGroupByAggregation() { + // "preStep" uses default implementation (pass-through) + + override def reduce(numRows: Int, preStepData: Array[ColumnVector]): Array[Scalar] = { + // Merge by summing both sum and count columns + val sumCol = preStepData(0) + val countCol = preStepData(1) + + // Avoid leaks even if there is an exception when merging countCol + closeOnExcept(sumCol.sum()) { mergedSum => + val mergedCount = countCol.sum() + Array(mergedSum, mergedCount) + } + } + + override def aggregate(inputIndices: Array[Int]): Array[GroupByAggregationOnColumn] = { + // Merge by summing both columns + Array( + GroupByAggregation.sum().onColumn(inputIndices(0)), // sum of sums + GroupByAggregation.sum().onColumn(inputIndices(1)) // sum of counts + ) + } + + // "postStep" uses default implementation (pass-through) + } + } +} \ No newline at end of file diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala index aaea8a36ee8..68c324e3734 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala @@ -1315,6 +1315,10 @@ trait SparkQueryCompareTestSuite extends AnyFunSuite with BeforeAndAfterAll { conf.set(RapidsConf.GPU_BATCH_SIZE_BYTES.key, batchSize.toString) } + def emptyRowsDf(session: SparkSession, schema: StructType): DataFrame = { + session.createDataFrame(session.sparkContext.parallelize(Seq.empty[Row], 2), schema) + } + def mixedDfWithBuckets(session: SparkSession): DataFrame = { import session.implicits._ Seq[(java.lang.Integer, java.lang.Long, java.lang.Double, String, java.lang.Integer, String)]( @@ -2022,6 +2026,14 @@ trait SparkQueryCompareTestSuite extends AnyFunSuite with BeforeAndAfterAll { )))(_) } + def groupbyStringsIntsIntsFromCsv: SparkSession => DataFrame = { + fromCsvDf("group_strings_ints_ints.csv", StructType(Array( + StructField("key_str", StringType, nullable = true), + StructField("c1_int", IntegerType, nullable = true), + StructField("c2_int", IntegerType, nullable = true) + )))(_) + } + def singularDoubleDf(session: SparkSession): DataFrame = { import session.implicits._ Seq(1.1).toDF("double") diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala index fc397301fbb..16567ea55fb 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -41,7 +41,7 @@ class RapidsTestSettings extends BackendTestSettings { .exclude("collect functions should be able to cast to array type with no null values", ADJUST_UT("order of elements in the array is non-deterministic in collect")) .exclude("SPARK-17641: collect functions should not collect null values", ADJUST_UT("order of elements in the array is non-deterministic in collect")) .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", WONT_FIX_ISSUE("Codegen related UT, not applicable for GPU")) - .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"), (getJavaMajorVersion() >= 17)) + .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"), (getJavaMajorVersion() >= 11)) enableSuite[RapidsJsonExpressionsSuite] .exclude("from_json - invalid data", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10891")) .exclude("from_json - input=empty array, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10907")) diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/320/operatorsScore.csv +++ b/tools/generated_files/320/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/320/supportedExprs.csv +++ b/tools/generated_files/320/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/321/operatorsScore.csv +++ b/tools/generated_files/321/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/321/supportedExprs.csv +++ b/tools/generated_files/321/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/321cdh/operatorsScore.csv +++ b/tools/generated_files/321cdh/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/321cdh/supportedExprs.csv +++ b/tools/generated_files/321cdh/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/322/operatorsScore.csv +++ b/tools/generated_files/322/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/322/supportedExprs.csv +++ b/tools/generated_files/322/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/323/operatorsScore.csv +++ b/tools/generated_files/323/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/323/supportedExprs.csv +++ b/tools/generated_files/323/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/324/operatorsScore.csv +++ b/tools/generated_files/324/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/324/supportedExprs.csv +++ b/tools/generated_files/324/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv index d408b9e042d..4b116751d42 100644 --- a/tools/generated_files/330/operatorsScore.csv +++ b/tools/generated_files/330/operatorsScore.csv @@ -145,6 +145,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -237,6 +238,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv index c607288973e..26ed32b65a8 100644 --- a/tools/generated_files/330/supportedExprs.csv +++ b/tools/generated_files/330/supportedExprs.csv @@ -834,7 +834,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv index d408b9e042d..4b116751d42 100644 --- a/tools/generated_files/330cdh/operatorsScore.csv +++ b/tools/generated_files/330cdh/operatorsScore.csv @@ -145,6 +145,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -237,6 +238,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv index c607288973e..26ed32b65a8 100644 --- a/tools/generated_files/330cdh/supportedExprs.csv +++ b/tools/generated_files/330cdh/supportedExprs.csv @@ -834,7 +834,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv index 99bfa557c4c..e014362cd1a 100644 --- a/tools/generated_files/331/operatorsScore.csv +++ b/tools/generated_files/331/operatorsScore.csv @@ -146,6 +146,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -238,6 +239,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv index 5077b8a0500..a286681f0a0 100644 --- a/tools/generated_files/331/supportedExprs.csv +++ b/tools/generated_files/331/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv index 99bfa557c4c..e014362cd1a 100644 --- a/tools/generated_files/332/operatorsScore.csv +++ b/tools/generated_files/332/operatorsScore.csv @@ -146,6 +146,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -238,6 +239,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv index 5077b8a0500..a286681f0a0 100644 --- a/tools/generated_files/332/supportedExprs.csv +++ b/tools/generated_files/332/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv index 99bfa557c4c..e014362cd1a 100644 --- a/tools/generated_files/332cdh/operatorsScore.csv +++ b/tools/generated_files/332cdh/operatorsScore.csv @@ -146,6 +146,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -238,6 +239,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv index 5077b8a0500..a286681f0a0 100644 --- a/tools/generated_files/332cdh/supportedExprs.csv +++ b/tools/generated_files/332cdh/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv index 99bfa557c4c..e014362cd1a 100644 --- a/tools/generated_files/333/operatorsScore.csv +++ b/tools/generated_files/333/operatorsScore.csv @@ -146,6 +146,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -238,6 +239,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv index 5077b8a0500..a286681f0a0 100644 --- a/tools/generated_files/333/supportedExprs.csv +++ b/tools/generated_files/333/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv index 99bfa557c4c..e014362cd1a 100644 --- a/tools/generated_files/334/operatorsScore.csv +++ b/tools/generated_files/334/operatorsScore.csv @@ -146,6 +146,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -238,6 +239,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv index 5077b8a0500..a286681f0a0 100644 --- a/tools/generated_files/334/supportedExprs.csv +++ b/tools/generated_files/334/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv index c8ad2436f86..8d57d996800 100644 --- a/tools/generated_files/340/operatorsScore.csv +++ b/tools/generated_files/340/operatorsScore.csv @@ -147,6 +147,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -239,6 +240,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv index 3f650da22a3..a1a6d420c46 100644 --- a/tools/generated_files/340/supportedExprs.csv +++ b/tools/generated_files/340/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv index c8ad2436f86..8d57d996800 100644 --- a/tools/generated_files/341/operatorsScore.csv +++ b/tools/generated_files/341/operatorsScore.csv @@ -147,6 +147,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -239,6 +240,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv index 3f650da22a3..a1a6d420c46 100644 --- a/tools/generated_files/341/supportedExprs.csv +++ b/tools/generated_files/341/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv index c8ad2436f86..8d57d996800 100644 --- a/tools/generated_files/342/operatorsScore.csv +++ b/tools/generated_files/342/operatorsScore.csv @@ -147,6 +147,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -239,6 +240,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv index 3f650da22a3..a1a6d420c46 100644 --- a/tools/generated_files/342/supportedExprs.csv +++ b/tools/generated_files/342/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv index c8ad2436f86..8d57d996800 100644 --- a/tools/generated_files/343/operatorsScore.csv +++ b/tools/generated_files/343/operatorsScore.csv @@ -147,6 +147,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -239,6 +240,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv index 3f650da22a3..a1a6d420c46 100644 --- a/tools/generated_files/343/supportedExprs.csv +++ b/tools/generated_files/343/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/344/operatorsScore.csv b/tools/generated_files/344/operatorsScore.csv index c8ad2436f86..8d57d996800 100644 --- a/tools/generated_files/344/operatorsScore.csv +++ b/tools/generated_files/344/operatorsScore.csv @@ -147,6 +147,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -239,6 +240,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/344/supportedExprs.csv b/tools/generated_files/344/supportedExprs.csv index 3f650da22a3..a1a6d420c46 100644 --- a/tools/generated_files/344/supportedExprs.csv +++ b/tools/generated_files/344/supportedExprs.csv @@ -836,7 +836,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv index 031830ad733..2de3aac1d29 100644 --- a/tools/generated_files/350/operatorsScore.csv +++ b/tools/generated_files/350/operatorsScore.csv @@ -151,6 +151,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -244,6 +245,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/350/supportedExprs.csv +++ b/tools/generated_files/350/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv index 031830ad733..2de3aac1d29 100644 --- a/tools/generated_files/351/operatorsScore.csv +++ b/tools/generated_files/351/operatorsScore.csv @@ -151,6 +151,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -244,6 +245,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/351/supportedExprs.csv +++ b/tools/generated_files/351/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/352/operatorsScore.csv b/tools/generated_files/352/operatorsScore.csv index fe689f780a9..36e50978ad8 100644 --- a/tools/generated_files/352/operatorsScore.csv +++ b/tools/generated_files/352/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -245,6 +246,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/352/supportedExprs.csv b/tools/generated_files/352/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/352/supportedExprs.csv +++ b/tools/generated_files/352/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/353/operatorsScore.csv b/tools/generated_files/353/operatorsScore.csv index fe689f780a9..36e50978ad8 100644 --- a/tools/generated_files/353/operatorsScore.csv +++ b/tools/generated_files/353/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -245,6 +246,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/353/supportedExprs.csv b/tools/generated_files/353/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/353/supportedExprs.csv +++ b/tools/generated_files/353/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/354/operatorsScore.csv b/tools/generated_files/354/operatorsScore.csv index fe689f780a9..36e50978ad8 100644 --- a/tools/generated_files/354/operatorsScore.csv +++ b/tools/generated_files/354/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -245,6 +246,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/354/supportedExprs.csv b/tools/generated_files/354/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/354/supportedExprs.csv +++ b/tools/generated_files/354/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/355/operatorsScore.csv b/tools/generated_files/355/operatorsScore.csv index fe689f780a9..36e50978ad8 100644 --- a/tools/generated_files/355/operatorsScore.csv +++ b/tools/generated_files/355/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -245,6 +246,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/355/supportedExprs.csv b/tools/generated_files/355/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/355/supportedExprs.csv +++ b/tools/generated_files/355/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/356/operatorsScore.csv b/tools/generated_files/356/operatorsScore.csv index fe689f780a9..36e50978ad8 100644 --- a/tools/generated_files/356/operatorsScore.csv +++ b/tools/generated_files/356/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -245,6 +246,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/356/supportedExprs.csv b/tools/generated_files/356/supportedExprs.csv index 99fe3750667..8d9c8ea8d49 100644 --- a/tools/generated_files/356/supportedExprs.csv +++ b/tools/generated_files/356/supportedExprs.csv @@ -844,7 +844,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/400/operatorsScore.csv b/tools/generated_files/400/operatorsScore.csv index fd239d29725..2d6ed65f4c8 100644 --- a/tools/generated_files/400/operatorsScore.csv +++ b/tools/generated_files/400/operatorsScore.csv @@ -152,6 +152,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -246,6 +247,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/400/supportedExprs.csv b/tools/generated_files/400/supportedExprs.csv index 7861c579d8b..c3bc508ec68 100644 --- a/tools/generated_files/400/supportedExprs.csv +++ b/tools/generated_files/400/supportedExprs.csv @@ -846,7 +846,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/401/operatorsScore.csv b/tools/generated_files/401/operatorsScore.csv index bbaa15c9e66..d7a45e08280 100644 --- a/tools/generated_files/401/operatorsScore.csv +++ b/tools/generated_files/401/operatorsScore.csv @@ -154,6 +154,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -248,6 +249,8 @@ Round,4 RoundCeil,4 RoundFloor,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/401/supportedExprs.csv b/tools/generated_files/401/supportedExprs.csv index c22306819e3..7a24562a639 100644 --- a/tools/generated_files/401/supportedExprs.csv +++ b/tools/generated_files/401/supportedExprs.csv @@ -850,7 +850,19 @@ NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv index d9e9da6221f..738e376e1b6 100644 --- a/tools/generated_files/operatorsScore.csv +++ b/tools/generated_files/operatorsScore.csv @@ -140,6 +140,7 @@ Greatest,4 HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 +HiveUDAFFunction,4 Hour,4 HyperLogLogPlusPlus,4 Hypot,4 @@ -227,6 +228,8 @@ Reverse,4 Rint,4 Round,4 RowNumber,4 +ScalaAggregator,4 +ScalaUDAF,4 ScalaUDF,4 ScalarSubquery,4 Second,4 diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv index a278aed8a06..2e95ea0c96f 100644 --- a/tools/generated_files/supportedExprs.csv +++ b/tools/generated_files/supportedExprs.csv @@ -807,7 +807,19 @@ StaticInvoke,S, ,The supported types are not deterministic since it's a dynamic NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaAggregator,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveUDAFFunction,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS