Skip to content

Commit 8906e59

Browse files
committed
[Enhancement] Support the array join syntax of Clickhouse
1 parent 10589d5 commit 8906e59

File tree

10 files changed

+239
-7
lines changed

10 files changed

+239
-7
lines changed

be/src/exprs/table_function/multi_unnest.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ class MultiUnnest final : public TableFunction {
5252
copy_count_column->append(offset);
5353
for (int row_idx = 0; row_idx < row_count; ++row_idx) {
5454
uint32_t max_length_array_size = 0;
55+
uint32_t min_length_array_size = UINT32_MAX;
5556
for (auto& col_idx : state->get_columns()) {
5657
Column* column = col_idx->as_mutable_raw_ptr();
5758
if (column->is_null(row_idx)) {
5859
// current row is null, ignore the offset.
60+
min_length_array_size = 0;
5961
continue;
6062
}
6163
auto* col_array = down_cast<ArrayColumn*>(ColumnHelper::get_data_column(column));
@@ -66,7 +68,14 @@ class MultiUnnest final : public TableFunction {
6668
if (array_element_length > max_length_array_size) {
6769
max_length_array_size = array_element_length;
6870
}
71+
if (array_element_length < min_length_array_size) {
72+
min_length_array_size = array_element_length;
73+
}
6974
}
75+
if (max_length_array_size != min_length_array_size && state->get_is_array_join_mode()) {
76+
state->set_status(Status::InternalError("Sizes of ARRAY-JOIN-ed arrays do not match."));
77+
return {};
78+
}
7079
if (max_length_array_size == 0 && state->get_is_left_join()) {
7180
offset += 1;
7281
copy_count_column->append(offset);
@@ -122,6 +131,9 @@ class MultiUnnest final : public TableFunction {
122131
if (table_fn.__isset.is_left_join) {
123132
(*state)->set_is_left_join(table_fn.is_left_join);
124133
}
134+
if (table_fn.__isset.is_array_join) {
135+
(*state)->set_is_array_join_mode(table_fn.is_array_join);
136+
}
125137
return Status::OK();
126138
}
127139

be/src/exprs/table_function/table_function.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class TableFunctionState {
4343

4444
bool get_is_left_join() { return _is_left_join; }
4545

46+
void set_is_array_join_mode(bool _is_array_join_mode) { this->_is_array_join_mode = _is_array_join_mode; }
47+
48+
bool get_is_array_join_mode() { return _is_array_join_mode; }
49+
4650
// How many rows of `get_columns()` have been processed/consumed by the table function.
4751
//
4852
// If `processed_rows()` < `input_rows()`, the table function will be invoked again with the same parameter columns.
@@ -83,6 +87,7 @@ class TableFunctionState {
8387

8488
// used to identify left join for table function
8589
bool _is_left_join = false;
90+
bool _is_array_join_mode = false;
8691
bool _is_required = true;
8792
};
8893

fe/fe-core/src/main/java/com/starrocks/catalog/TableFunction.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public class TableFunction extends Function {
6262
// not serialized
6363
private boolean isLeftJoin = false;
6464

65+
private boolean isArrayJoin = false;
66+
6567
protected TableFunction() {
6668
}
6769

@@ -171,6 +173,10 @@ public boolean isLeftJoin() {
171173
return this.isLeftJoin;
172174
}
173175

176+
public void setIsArrayJoin(boolean isArrayJoin) {
177+
this.isArrayJoin = isArrayJoin;
178+
}
179+
174180

175181

176182
@Override
@@ -180,6 +186,7 @@ public TFunction toThrift() {
180186
tableFn.setSymbol(symbolName);
181187
tableFn.setRet_types(tableFnReturnTypes.stream().map(TypeSerializer::toThrift).collect(Collectors.toList()));
182188
tableFn.setIs_left_join(isLeftJoin);
189+
tableFn.setIs_array_join(isArrayJoin);
183190
fn.setTable_fn(tableFn);
184191
return fn;
185192
}

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/QueryAnalyzer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,6 +1611,7 @@ public Scope visitTableFunction(TableFunctionRelation node, Scope scope) {
16111611

16121612
TableFunction tableFunction = (TableFunction) fn;
16131613
tableFunction.setIsLeftJoin(node.getIsLeftJoin());
1614+
tableFunction.setIsArrayJoin(node.getIsArrayJoin());
16141615
node.setTableFunction(tableFunction);
16151616
node.setChildExpressions(childExpressions);
16161617

fe/fe-core/src/main/java/com/starrocks/sql/ast/TableFunctionRelation.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ public class TableFunctionRelation extends Relation {
4343

4444
private boolean isLeftJoin = false;
4545

46+
private boolean isArrayJoin = false;
47+
4648
public TableFunctionRelation(FunctionCallExpr functionCallExpr) {
4749
this(functionCallExpr.getFnRef().getFnName().toString().toLowerCase(),
4850
functionCallExpr.getParams(), functionCallExpr.getPos());
@@ -78,6 +80,14 @@ public boolean getIsLeftJoin() {
7880
return isLeftJoin;
7981
}
8082

83+
public void setIsArrayJoin(boolean isArrayJoin) {
84+
this.isArrayJoin = isArrayJoin;
85+
}
86+
87+
public boolean getIsArrayJoin() {
88+
return isArrayJoin;
89+
}
90+
8191
public List<Expr> getChildExpressions() {
8292
return childExpressions;
8393
}

fe/fe-core/src/main/java/com/starrocks/sql/parser/AstBuilder.java

Lines changed: 90 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5793,13 +5793,21 @@ This can share the same logic as select sum(1) from table
57935793
SelectList selectList = new SelectList(selectItems, isDistinct);
57945794
selectList.setHintNodes(hintMap.get(context));
57955795

5796-
SelectRelation resultSelectRelation = new SelectRelation(
5797-
selectList,
5798-
from,
5799-
(Expr) visitIfPresent(context.where),
5800-
(GroupByClause) visitIfPresent(context.groupingElement()),
5801-
(Expr) visitIfPresent(context.having),
5802-
createPos(context));
5796+
SelectRelation resultSelectRelation = null;
5797+
5798+
//Convert ARRAY JOIN to UNNSET + LATERAL JOIN
5799+
if (context.arrayJoinClause() != null) {
5800+
resultSelectRelation = transFormArrayJoinToUnnest(selectList, from, context);
5801+
} else {
5802+
resultSelectRelation = new SelectRelation(
5803+
selectList,
5804+
from,
5805+
(Expr) visitIfPresent(context.where),
5806+
(GroupByClause) visitIfPresent(context.groupingElement()),
5807+
(Expr) visitIfPresent(context.having),
5808+
createPos(context));
5809+
}
5810+
58035811

58045812
// extend Query with QUALIFY to nested queries with filter.
58055813
if (context.qualifyFunction != null) {
@@ -9419,6 +9427,81 @@ private QualifiedName normalizeName(QualifiedName qualifiedName) {
94199427
return QualifiedName.of(parts, qualifiedName.getPos());
94209428
}
94219429

9430+
private SelectRelation transFormArrayJoinToUnnest(SelectList selectList,
9431+
Relation fromRelation,
9432+
com.starrocks.sql.parser.StarRocksParser.QuerySpecificationContext context) {
9433+
9434+
com.starrocks.sql.parser.StarRocksParser.ArrayJoinClauseContext arrayJoinExprContext = context.arrayJoinClause();
9435+
9436+
JoinOperator joinType = JoinOperator.INNER_JOIN;
9437+
if (arrayJoinExprContext.LEFT() != null) {
9438+
joinType = JoinOperator.LEFT_OUTER_JOIN;
9439+
}
9440+
9441+
fromRelation = buildUnnestJoin(fromRelation, arrayJoinExprContext.arrayJoinList(), joinType);
9442+
9443+
return new SelectRelation(
9444+
selectList,
9445+
fromRelation,
9446+
(Expr) visitIfPresent(context.where),
9447+
(GroupByClause) visitIfPresent(context.groupingElement()),
9448+
(Expr) visitIfPresent(context.having),
9449+
createPos(context)
9450+
);
9451+
}
9452+
9453+
private Relation buildUnnestJoin(Relation leftRelation,
9454+
com.starrocks.sql.parser.StarRocksParser.ArrayJoinListContext exprListContext,
9455+
JoinOperator joinType) {
9456+
9457+
List<Expr> arrayExprs = Lists.newArrayList();
9458+
List<String> aliasList = Lists.newArrayList();
9459+
for (com.starrocks.sql.parser.StarRocksParser.ArrayJoinExprContext exprContext : exprListContext.arrayJoinExpr()) {
9460+
Expr arrayExpr = (Expr) visit(exprContext.expression());
9461+
String alias = null;
9462+
if (exprContext.identifier() != null) {
9463+
alias = ((Identifier) visit(exprContext.identifier())).getValue();
9464+
} else {
9465+
//If no alias is provided, use the expression string as the column name
9466+
alias = exprContext.expression().getText();
9467+
}
9468+
aliasList.add(alias);
9469+
arrayExprs.add(arrayExpr);
9470+
}
9471+
9472+
FunctionCallExpr unnestFunction = createUnnestFunction(arrayExprs);
9473+
9474+
String tableAlias = generateTableAlias(aliasList.get(0));
9475+
9476+
TableFunctionRelation unnestRelation = new TableFunctionRelation(unnestFunction);
9477+
unnestRelation.setChildExpressions(arrayExprs);
9478+
unnestRelation.setAlias(new TableName(null, tableAlias));
9479+
unnestRelation.setColumnOutputNames(aliasList);
9480+
unnestRelation.setIsArrayJoin(true);
9481+
BoolLiteral boolLiteral = null;
9482+
if (joinType.isLeftOuterJoin()) {
9483+
unnestRelation.setIsLeftJoin(true);
9484+
boolLiteral = new BoolLiteral(true);
9485+
}
9486+
9487+
return new JoinRelation(
9488+
joinType,
9489+
leftRelation,
9490+
unnestRelation,
9491+
boolLiteral,
9492+
true);
9493+
}
9494+
9495+
private FunctionCallExpr createUnnestFunction(List<Expr> params) {
9496+
FunctionCallExpr unnestFunc = new FunctionCallExpr("unnest", params);
9497+
return unnestFunc;
9498+
}
9499+
9500+
private String generateTableAlias(String columnAlias) {
9501+
// Generate table aliases to avoid conflicts
9502+
return "t_" + columnAlias;
9503+
}
9504+
94229505
public static IndexDef.IndexType getIndexType(com.starrocks.sql.parser.StarRocksParser.IndexTypeContext indexTypeContext) {
94239506
IndexDef.IndexType index;
94249507
if (indexTypeContext == null || indexTypeContext.BITMAP() != null) {

fe/fe-grammar/src/main/antlr/com/starrocks/grammar/StarRocks.g4

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2370,6 +2370,7 @@ limitElement
23702370
querySpecification
23712371
: SELECT setQuantifier? selectItem (',' selectItem)*
23722372
fromClause
2373+
arrayJoinClause?
23732374
((WHERE where=expression)? (GROUP BY groupingElement)? (HAVING having=expression)?
23742375
(QUALIFY qualifyFunction=selectItem comparisonOperator limit=INTEGER_VALUE)?)
23752376
;
@@ -2379,6 +2380,18 @@ fromClause
23792380
| FROM DUAL #dual
23802381
;
23812382

2383+
arrayJoinClause
2384+
: (LEFT? ARRAY) JOIN arrayJoinList
2385+
;
2386+
2387+
arrayJoinList
2388+
: arrayJoinExpr (',' arrayJoinExpr)*
2389+
;
2390+
2391+
arrayJoinExpr
2392+
:expression (AS? identifier)?
2393+
;
2394+
23822395
groupingElement
23832396
: ROLLUP '(' (expressionList)? ')' #rollup
23842397
| CUBE '(' (expressionList)? ')' #cube

gensrc/thrift/Types.thrift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ struct TTableFunction {
336336
2: optional string symbol
337337
// Table function left join
338338
3: optional bool is_left_join
339+
4: optional bool is_array_join
339340
}
340341

341342
struct TAggStateDesc {
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
-- name: test_array_join
2+
CREATE TABLE `student_score` (
3+
`id` bigint(20) NULL COMMENT "",
4+
`scores` array<int(11)> NULL COMMENT ""
5+
) ENGINE=OLAP
6+
DUPLICATE KEY(`id`)
7+
DISTRIBUTED BY HASH(`id`)
8+
PROPERTIES (
9+
"replication_num" = "1"
10+
);
11+
-- result:
12+
-- !result
13+
14+
INSERT INTO student_score VALUES
15+
(1, [80,85,87]),
16+
(2, [77, null, 89]),
17+
(3, null),
18+
(4, []),
19+
(5, [90,92]);
20+
-- result:
21+
-- !result
22+
23+
select id , c1 from student_score array join scores as c1 order by id,c1
24+
-- result:
25+
1 80
26+
1 85
27+
1 87
28+
2 None
29+
2 77
30+
2 89
31+
5 90
32+
5 92
33+
-- !result
34+
35+
select id , c1 from student_score left array join scores as c1 order by id,c1
36+
-- result:
37+
1 80
38+
1 85
39+
1 87
40+
2 None
41+
2 77
42+
2 89
43+
3 None
44+
4 None
45+
5 90
46+
5 92
47+
-- !result
48+
49+
select id ,c1 ,c2 from student_score array join scores as c1, scores as c2 order by id,c1
50+
-- result:
51+
1 80 80
52+
1 85 85
53+
1 87 87
54+
2 None None
55+
2 77 77
56+
2 89 89
57+
5 90 90
58+
5 92 92
59+
-- !result
60+
61+
select id ,c1 ,c2 from student_score left array join scores as c1, scores as c2 order by id,c1
62+
-- result:
63+
1 80 80
64+
1 85 85
65+
1 87 87
66+
2 None None
67+
2 77 77
68+
2 89 89
69+
3 None None
70+
4 None None
71+
5 90 90
72+
5 92 92
73+
-- !result
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
-- name: test_array_join
2+
3+
CREATE TABLE `student_score` (
4+
`id` bigint(20) NULL COMMENT "",
5+
`scores` array<int(11)> NULL COMMENT ""
6+
) ENGINE=OLAP
7+
DUPLICATE KEY(`id`)
8+
DISTRIBUTED BY HASH(`id`)
9+
PROPERTIES (
10+
"replication_num" = "1"
11+
);
12+
13+
INSERT INTO student_score VALUES
14+
(1, [80,85,87]),
15+
(2, [77, null, 89]),
16+
(3, null),
17+
(4, []),
18+
(5, [90,92]);
19+
20+
select id , c1 from student_score array join scores as c1 order by id,c1
21+
22+
select id , c1 from student_score left array join scores as c1 order by id,c1
23+
24+
select id ,c1 ,c2 from student_score array join scores as c1, scores as c2 order by id,c1
25+
26+
select id ,c1 ,c2 from student_score left array join scores as c1, scores as c2 order by id,c1
27+

0 commit comments

Comments
 (0)