Skip to content

Commit e6771f8

Browse files
authored
[feature](function) support nth_value window function (#50559)
### What problem does this PR solve? Problem Summary: doc: apache/doris-website#2350 support nth_value window function nth_value(column, offset), offset is positive number
1 parent 839c277 commit e6771f8

File tree

8 files changed

+307
-1
lines changed

8 files changed

+307
-1
lines changed

be/src/vec/aggregate_functions/aggregate_function_window.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ AggregateFunctionPtr create_function_lead_lag_first_last(const String& name,
4141
// FE have rewrite case first_value(k1,false)--->first_value(k1)
4242
// so size is 2, must will be arg_ignore_null_value
4343
if (argument_types.size() == 2) {
44-
DCHECK(name == "first_value" || name == "last_value") << "invalid function name: " << name;
44+
DCHECK(name == "first_value" || name == "last_value" || name == "nth_value")
45+
<< "invalid function name: " << name;
4546
arg_ignore_null_value = true;
4647
}
4748
switch (argument_types[0]->get_primitive_type()) {
@@ -374,6 +375,8 @@ CREATE_WINDOW_FUNCTION_WITH_NAME_AND_DATA(create_aggregate_function_window_first
374375
WindowFunctionFirstImpl);
375376
CREATE_WINDOW_FUNCTION_WITH_NAME_AND_DATA(create_aggregate_function_window_last, FirstLastData,
376377
WindowFunctionLastImpl);
378+
CREATE_WINDOW_FUNCTION_WITH_NAME_AND_DATA(create_aggregate_function_window_nth_value, FirstLastData,
379+
WindowFunctionNthValueImpl);
377380

378381
void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& factory) {
379382
factory.register_function("dense_rank", creator_without_type::creator<WindowFunctionDenseRank>);
@@ -391,6 +394,7 @@ void register_aggregate_function_window_lead_lag_first_last(
391394
factory.register_function_both("lag", create_aggregate_function_window_lag);
392395
factory.register_function_both("first_value", create_aggregate_function_window_first);
393396
factory.register_function_both("last_value", create_aggregate_function_window_last);
397+
factory.register_function_both("nth_value", create_aggregate_function_window_nth_value);
394398
}
395399

396400
} // namespace doris::vectorized

be/src/vec/aggregate_functions/aggregate_function_window.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,26 @@ struct WindowFunctionLastImpl : Data {
592592
static const char* name() { return "last_value"; }
593593
};
594594

595+
template <typename Data, bool = false>
596+
struct WindowFunctionNthValueImpl : Data {
597+
void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
598+
int64_t frame_end, const IColumn** columns) {
599+
DCHECK_LE(frame_start, frame_end);
600+
frame_start = std::max<int64_t>(frame_start, partition_start);
601+
frame_end = std::min<int64_t>(frame_end, partition_end);
602+
int64_t offset = assert_cast<const ColumnInt64&, TypeCheckOnRelease::DISABLE>(*columns[1])
603+
.get_data()[0] -
604+
1;
605+
if (frame_end - frame_start <= offset) {
606+
this->set_is_null();
607+
return;
608+
}
609+
this->set_value(columns, offset + frame_start);
610+
}
611+
612+
static const char* name() { return "nth_value"; }
613+
};
614+
595615
template <typename Data>
596616
class WindowFunctionData final
597617
: public IAggregateFunctionDataHelper<Data, WindowFunctionData<Data>> {

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.nereids.trees.expressions.functions.window.Lag;
2424
import org.apache.doris.nereids.trees.expressions.functions.window.LastValue;
2525
import org.apache.doris.nereids.trees.expressions.functions.window.Lead;
26+
import org.apache.doris.nereids.trees.expressions.functions.window.NthValue;
2627
import org.apache.doris.nereids.trees.expressions.functions.window.Ntile;
2728
import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank;
2829
import org.apache.doris.nereids.trees.expressions.functions.window.Rank;
@@ -45,6 +46,7 @@ public class BuiltinWindowFunctions implements FunctionHelper {
4546
window(LastValue.class, "last_value"),
4647
window(Lead.class, "lead"),
4748
window(Ntile.class, "ntile"),
49+
window(NthValue.class, "nth_value"),
4850
window(PercentRank.class, "percent_rank"),
4951
window(Rank.class, "rank"),
5052
window(RowNumber.class, "row_number"),

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.apache.doris.nereids.trees.expressions.functions.window.Lag;
3434
import org.apache.doris.nereids.trees.expressions.functions.window.LastValue;
3535
import org.apache.doris.nereids.trees.expressions.functions.window.Lead;
36+
import org.apache.doris.nereids.trees.expressions.functions.window.NthValue;
3637
import org.apache.doris.nereids.trees.expressions.functions.window.Ntile;
3738
import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank;
3839
import org.apache.doris.nereids.trees.expressions.functions.window.Rank;
@@ -433,6 +434,12 @@ public Ntile visitNtile(Ntile ntile, Void ctx) {
433434
return ntile;
434435
}
435436

437+
@Override
438+
public NthValue visitNthValue(NthValue nthValue, Void ctx) {
439+
NthValue.checkSecondParameter(nthValue);
440+
return nthValue;
441+
}
442+
436443
/**
437444
* check if the current WindowFrame equals with the required WindowFrame; if current WindowFrame is null,
438445
* the requiredFrame should be used as default frame.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.functions.window;
19+
20+
import org.apache.doris.catalog.FunctionSignature;
21+
import org.apache.doris.nereids.exceptions.AnalysisException;
22+
import org.apache.doris.nereids.trees.expressions.Expression;
23+
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
24+
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
25+
import org.apache.doris.nereids.trees.expressions.literal.Literal;
26+
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
27+
import org.apache.doris.nereids.types.BigIntType;
28+
import org.apache.doris.nereids.types.DataType;
29+
import org.apache.doris.nereids.types.coercion.AnyDataType;
30+
31+
import com.google.common.base.Preconditions;
32+
import com.google.common.collect.ImmutableList;
33+
34+
import java.util.List;
35+
36+
/**
37+
* class for nth_value(column, offset)
38+
*/
39+
public class NthValue extends WindowFunction
40+
implements AlwaysNullable, ExplicitlyCastableSignature {
41+
42+
private static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
43+
FunctionSignature.retArgType(0).args(AnyDataType.INSTANCE_WITHOUT_INDEX, BigIntType.INSTANCE)
44+
);
45+
46+
public NthValue(Expression child, Expression offset) {
47+
super("nth_value", child, offset);
48+
}
49+
50+
public NthValue(List<Expression> children) {
51+
super("nth_value", children);
52+
}
53+
54+
@Override
55+
public List<FunctionSignature> getSignatures() {
56+
return SIGNATURES;
57+
}
58+
59+
@Override
60+
public NthValue withChildren(List<Expression> children) {
61+
Preconditions.checkArgument(children.size() == 2);
62+
return new NthValue(children);
63+
}
64+
65+
@Override
66+
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
67+
return visitor.visitNthValue(this, context);
68+
}
69+
70+
@Override
71+
public DataType getDataType() {
72+
return child(0).getDataType();
73+
}
74+
75+
/**
76+
* Check the second parameter of NthValue function.
77+
* The second parameter must be a constant positive integer.
78+
*/
79+
public static void checkSecondParameter(NthValue nthValue) {
80+
Preconditions.checkArgument(nthValue.arity() == 2);
81+
Expression offset = nthValue.child(1);
82+
if (offset instanceof Literal) {
83+
if (((Literal) offset).getDouble() <= 0) {
84+
throw new AnalysisException(
85+
"The offset parameter of NthValue must be a constant positive integer: " + offset);
86+
}
87+
} else {
88+
throw new AnalysisException(
89+
"The offset parameter of NthValue must be a constant positive integer: " + offset);
90+
}
91+
}
92+
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.nereids.trees.expressions.functions.window.Lag;
2424
import org.apache.doris.nereids.trees.expressions.functions.window.LastValue;
2525
import org.apache.doris.nereids.trees.expressions.functions.window.Lead;
26+
import org.apache.doris.nereids.trees.expressions.functions.window.NthValue;
2627
import org.apache.doris.nereids.trees.expressions.functions.window.Ntile;
2728
import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank;
2829
import org.apache.doris.nereids.trees.expressions.functions.window.Rank;
@@ -58,6 +59,10 @@ default R visitNtile(Ntile ntile, C context) {
5859
return visitWindowFunction(ntile, context);
5960
}
6061

62+
default R visitNthValue(NthValue nthValue, C context) {
63+
return visitWindowFunction(nthValue, context);
64+
}
65+
6166
default R visitPercentRank(PercentRank percentRank, C context) {
6267
return visitWindowFunction(percentRank, context);
6368
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !select --
3+
16
4+
5+
-- !select_1 --
6+
\N \N \N \N
7+
1 1989 1001 \N
8+
2 1986 1001 \N
9+
3 1989 1002 \N
10+
4 1991 3021 \N
11+
5 1985 5014 \N
12+
6 32767 3021 \N
13+
7 -32767 1002 \N
14+
8 255 2147483647 \N
15+
9 1991 -2147483647 \N
16+
10 1991 5014 \N
17+
11 1989 25699 \N
18+
12 32767 -2147483647 \N
19+
13 -32767 2147483647 \N
20+
14 255 103 \N
21+
15 1992 3021 \N
22+
23+
-- !select_2 --
24+
\N \N \N \N
25+
1 1989 1001 \N
26+
2 1986 1001 \N
27+
3 1989 1002 \N
28+
4 1991 3021 \N
29+
5 1985 5014 \N
30+
6 32767 3021 \N
31+
7 -32767 1002 \N
32+
8 255 2147483647 \N
33+
9 1991 -2147483647 \N
34+
10 1991 5014 \N
35+
11 1989 25699 \N
36+
12 32767 -2147483647 \N
37+
13 -32767 2147483647 \N
38+
14 255 103 \N
39+
15 1992 3021 \N
40+
41+
-- !select_3 --
42+
\N \N \N 13
43+
-32767 7 1002 13
44+
-32767 13 2147483647 13
45+
255 8 2147483647 8
46+
255 14 103 14
47+
1985 5 5014 5
48+
1986 2 1001 2
49+
1989 1 1001 1
50+
1989 3 1002 3
51+
1989 11 25699 11
52+
1991 4 3021 4
53+
1991 9 -2147483647 9
54+
1991 10 5014 10
55+
1992 15 3021 15
56+
32767 6 3021 6
57+
32767 12 -2147483647 12
58+
59+
-- !select_4 --
60+
\N \N \N \N
61+
-2147483647 1991 9 \N
62+
-2147483647 32767 12 \N
63+
103 255 14 \N
64+
1001 1986 2 \N
65+
1001 1989 1 \N
66+
1002 -32767 7 \N
67+
1002 1989 3 \N
68+
3021 1991 4 \N
69+
3021 1992 15 \N
70+
3021 32767 6 \N
71+
5014 1985 5 \N
72+
5014 1991 10 \N
73+
25699 1989 11 \N
74+
2147483647 -32767 13 \N
75+
2147483647 255 8 \N
76+
77+
-- !select_6 --
78+
\N \N \N \N
79+
1002 -32767 7 \N
80+
2147483647 -32767 13 \N
81+
103 255 14 \N
82+
1001 1986 2 \N
83+
1002 1989 3 \N
84+
3021 1991 4 \N
85+
5014 1991 10 \N
86+
-2147483647 32767 12 14
87+
2147483647 255 8 \N
88+
5014 1985 5 \N
89+
1001 1989 1 \N
90+
25699 1989 11 \N
91+
-2147483647 1991 9 \N
92+
3021 1992 15 \N
93+
3021 32767 6 \N
94+
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
suite("test_nthvalue_function") {
18+
19+
def dbName = "test_nthvalue_function_db"
20+
sql "DROP DATABASE IF EXISTS ${dbName}"
21+
sql "CREATE DATABASE ${dbName}"
22+
sql "USE $dbName"
23+
24+
sql "DROP TABLE IF EXISTS test_nthvalue_function"
25+
sql """
26+
CREATE TABLE IF NOT EXISTS `test_nthvalue_function` (
27+
`k0` boolean null comment "",
28+
`k1` tinyint(4) null comment "",
29+
`k2` smallint(6) null comment "",
30+
`k3` int(11) null comment "",
31+
`k4` bigint(20) null comment "",
32+
`k5` decimal(10, 6) null comment "",
33+
`k6` char(5) null comment "",
34+
`k10` date null comment "",
35+
`k11` datetime null comment "",
36+
`k7` varchar(20) null comment "",
37+
`k8` double max null comment "",
38+
`k9` float sum null comment "",
39+
`k12` string replace null comment "",
40+
`k13` largeint(40) replace null comment ""
41+
) engine=olap
42+
DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1")
43+
"""
44+
45+
streamLoad {
46+
table "test_nthvalue_function"
47+
db dbName
48+
set 'column_separator', ','
49+
file "../../baseall.txt"
50+
}
51+
sql "sync"
52+
53+
qt_select "select count() from test_nthvalue_function;"
54+
55+
test {
56+
sql "select k1, k2, k3, nth_value(k1,0) over (partition by k1 order by k2) as ntile from test_nthvalue_function order by k1, k2, k3 desc;"
57+
exception "positive"
58+
}
59+
60+
test {
61+
sql "select k1, k2, k3, nth_value(k1,-1) over (partition by k1 order by k2) as ntile from test_nthvalue_function order by k1, k2, k3 desc;"
62+
exception "positive"
63+
}
64+
65+
test {
66+
sql "select k1, k2, k3, nth_value(k1,NULL) over (partition by k1 order by k2) as ntile from test_nthvalue_function order by k1, k2, k3 desc;"
67+
exception "positive"
68+
}
69+
70+
qt_select_1 "select k1, k2, k3, nth_value(k1,3) over (partition by k1 order by k2) from test_nthvalue_function order by k1, k2, k3 desc;"
71+
qt_select_2 "select k1, k2, k3, nth_value(k1,5) over (partition by k1 order by k2) from test_nthvalue_function order by k1, k2, k3 desc;"
72+
qt_select_3 "select k2, k1, k3, nth_value(k1,3) over (order by k2 rows BETWEEN 2 PRECEDING AND 2 following) from test_nthvalue_function order by k2,k1;"
73+
qt_select_4 "select k3, k2, k1, nth_value(k1,3) over (partition by k3 order by k2) from test_nthvalue_function order by k3, k2, k1;"
74+
qt_select_6 "select k3, k2, k1, nth_value(k1,3) over (partition by k6 order by k2 rows between 10 preceding and 5 preceding) as res from test_nthvalue_function order by k6, k2, k1,res;"
75+
76+
77+
}
78+
79+
80+
81+
82+

0 commit comments

Comments
 (0)