Skip to content

Commit eae369b

Browse files
authored
[Improve][transform-v2] Support dynamic types for array function (#8139)
1 parent 9f02943 commit eae369b

File tree

9 files changed

+327
-34
lines changed

9 files changed

+327
-34
lines changed

docs/en/transform-v2/sql-functions.md

+9-2
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,15 @@ Generate an array.
998998

999999
Example:
10001000

1001-
select ARRAY('test1','test2','test3') as arrays
1002-
1001+
SELECT Array('c_1','c_2') as string_array,
1002+
Array(1.23,2.34) as double_array,
1003+
Array(1,2) as int_array,
1004+
Array(2147483648,2147483649) as long_array,
1005+
Array(1.23,2147483648) as double_array_1,
1006+
Array(1.23,2147483648,'c_1') as string_array_1
1007+
FROM fake
1008+
1009+
notes: Currently only string, double, long, int types are supported
10031010

10041011
### LATERAL VIEW
10051012
#### EXPLODE

docs/zh/transform-v2/sql-functions.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,15 @@ select UUID() as seatunnel_uuid
991991

992992
示例:
993993

994-
select ARRAY('test1','test2','test3') as arrays
994+
SELECT Array('c_1','c_2') as string_array,
995+
Array(1.23,2.34) as double_array,
996+
Array(1,2) as int_array,
997+
Array(2147483648,2147483649) as long_array,
998+
Array(1.23,2147483648) as double_array_1,
999+
Array(1.23,2147483648,'c_1') as string_array_1
1000+
FROM fake
1001+
1002+
注意:目前仅支持string、double、long、int几种类型
9951003

9961004
### LATERAL VIEW
9971005
#### EXPLODE

seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/explode_transform.conf

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ sink{
9494
},
9595
{
9696
field_name = num
97-
field_type = string
97+
field_type = int
9898
field_value = [{equals_to = 1}]
9999
}
100100
]

seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/explode_transform_without_outer.conf

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ sink{
8686
},
8787
{
8888
field_name = num
89-
field_type = "string"
89+
field_type = "int"
9090
field_value = [{equals_to = 1}]
9191
}
9292
]

seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_array.conf

+65-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
###### This config file is a demonstration of streaming processing in seatunnel config
1919
######
2020

21+
2122
env {
2223
job.mode = "BATCH"
2324
parallelism = 1
@@ -30,6 +31,7 @@ source {
3031
fields {
3132
pk_id = string
3233
name = string
34+
id = int
3335
}
3436
primaryKey {
3537
name = "pk_id"
@@ -39,22 +41,33 @@ source {
3941
rows = [
4042
{
4143
kind = INSERT
42-
fields = ["id001", "zhangsan,zhangsan"]
44+
fields = ["id001", "zhangsan,zhangsan",123]
4345
}
4446
]
4547
}
4648
}
4749

4850
transform {
4951
Sql {
50-
plugin_input = "fake"
51-
plugin_output = "fake1"
52-
query = "SELECT *,Array('c_1','c_2') as c_array FROM dual "
52+
plugin_output = "fake"
53+
query = """SELECT
54+
*,
55+
Array(pk_id,id) as field_array_1,
56+
Array(pk_id,'c_1') as field_array_2,
57+
Array(id,123) as field_array_3,
58+
Array('c_1','c_2') as string_array,
59+
Array(1.23,2.34) as double_array,
60+
Array(1,2) as int_array,
61+
Array(2147483648,2147483649) as long_array,
62+
Array(1.23,2147483648) as double_array_1,
63+
Array(1.23,2147483648,'c_1') as string_array_1
64+
FROM fake """
5365
}
5466
}
5567

5668
sink{
5769
assert {
70+
plugin_output = "fake"
5871
rules =
5972
{
6073
row_rules = [
@@ -79,12 +92,56 @@ sink{
7992
field_value = [{equals_to = "zhangsan,zhangsan"}]
8093
},
8194
{
82-
field_name = c_array
83-
field_type = array<string>
95+
field_name = id
96+
field_type = int
97+
field_value = [{equals_to = 123}]
98+
},
99+
{
100+
field_name = field_array_1
101+
field_type = array<STRING>
102+
field_value = [{equals_to = ["id001" ,"123"]}]
103+
},
104+
{
105+
field_name = field_array_2
106+
field_type = array<STRING>
107+
field_value = [{equals_to = ["id001" ,"c_1"]}]
108+
},
109+
{
110+
field_name = field_array_3
111+
field_type = array<INT>
112+
field_value = [{equals_to = [123 ,123]}]
113+
},
114+
{
115+
field_name = string_array
116+
field_type = array<STRING>
84117
field_value = [{equals_to = ["c_1" ,"c_2"]}]
85-
}
118+
},
119+
{
120+
field_name = double_array
121+
field_type = array<DOUBLE>
122+
field_value = [{equals_to = [1.23,2.34]}]
123+
},
124+
{
125+
field_name = int_array
126+
field_type = array<INT>
127+
field_value = [{equals_to = [1,2]}]
128+
},
129+
{
130+
field_name = long_array
131+
field_type = array<BIGINT>
132+
field_value = [{equals_to = [2147483648,2147483649]}]
133+
},
134+
{
135+
field_name = double_array_1
136+
field_type = array<DOUBLE>
137+
field_value = [{equals_to = [1.23,2147483648]}]
138+
},
139+
{
140+
field_name = string_array_1
141+
field_type = array<STRING>
142+
field_value = [{equals_to = ["1.23","2147483648","c_1"]}]
143+
}
86144
]
87145
}
88146
}
89147
}
90-

seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public class ZetaSQLEngine implements SQLEngine {
5959
private String inputTableName;
6060
@Nullable private String catalogTableName;
6161
private SeaTunnelRowType inputRowType;
62+
private SeaTunnelRowType outRowType;
6263

6364
private String sql;
6465
private PlainSelect selectBody;
@@ -216,10 +217,13 @@ public SeaTunnelRowType typeMapping(List<String> inputColumnsMapping) {
216217
}
217218
List<LateralView> lateralViews = selectBody.getLateralViews();
218219
if (CollectionUtils.isEmpty(lateralViews)) {
219-
return new SeaTunnelRowType(fieldNames, seaTunnelDataTypes);
220+
outRowType = new SeaTunnelRowType(fieldNames, seaTunnelDataTypes);
221+
} else {
222+
outRowType =
223+
zetaSQLFunction.lateralViewMapping(
224+
fieldNames, seaTunnelDataTypes, lateralViews, inputColumnsMapping);
220225
}
221-
return zetaSQLFunction.lateralViewMapping(
222-
fieldNames, seaTunnelDataTypes, lateralViews, inputColumnsMapping);
226+
return outRowType;
223227
}
224228

225229
private static String cleanEscape(String columnName) {

seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java

+11-18
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
2121
import org.apache.seatunnel.api.table.type.ArrayType;
22-
import org.apache.seatunnel.api.table.type.BasicType;
2322
import org.apache.seatunnel.api.table.type.DecimalType;
2423
import org.apache.seatunnel.api.table.type.MapType;
2524
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
@@ -29,6 +28,7 @@
2928
import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
3029
import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException;
3130
import org.apache.seatunnel.transform.exception.TransformException;
31+
import org.apache.seatunnel.transform.sql.zeta.functions.ArrayFunction;
3232
import org.apache.seatunnel.transform.sql.zeta.functions.DateTimeFunction;
3333
import org.apache.seatunnel.transform.sql.zeta.functions.NumericFunction;
3434
import org.apache.seatunnel.transform.sql.zeta.functions.StringFunction;
@@ -192,6 +192,7 @@ public class ZetaSQLFunction {
192192
public static final String UUID = "UUID";
193193

194194
private final SeaTunnelRowType inputRowType;
195+
195196
private final ZetaSQLType zetaSQLType;
196197
private final ZetaSQLFilter zetaSQLFilter;
197198

@@ -552,7 +553,7 @@ public Object executeFunctionExpr(String functionName, List<Object> args) {
552553
case NULLIF:
553554
return SystemFunction.nullif(args);
554555
case ARRAY:
555-
return SystemFunction.array(args);
556+
return ArrayFunction.array(args);
556557
case UUID:
557558
return randomUUID().toString();
558559
default:
@@ -743,8 +744,7 @@ private List<SeaTunnelRow> explode(
743744
next,
744745
aliasFieldIndex,
745746
row,
746-
expression,
747-
true);
747+
expression);
748748
}
749749
seaTunnelRows = next;
750750
} else if (expression instanceof Function) {
@@ -758,8 +758,7 @@ private List<SeaTunnelRow> explode(
758758
next,
759759
aliasFieldIndex,
760760
row,
761-
expression,
762-
false);
761+
expression);
763762
}
764763
seaTunnelRows = next;
765764
}
@@ -774,8 +773,7 @@ private void transformExplodeValue(
774773
List<SeaTunnelRow> next,
775774
int aliasFieldIndex,
776775
SeaTunnelRow row,
777-
Expression expression,
778-
boolean keepValueType) {
776+
Expression expression) {
779777
if (splitFieldValue == null) {
780778
if (isUsingOuter) {
781779
next.add(
@@ -798,13 +796,9 @@ private void transformExplodeValue(
798796
if (!isUsingOuter && fieldValue == null) {
799797
continue;
800798
}
801-
Object value =
802-
fieldValue == null
803-
? null
804-
: (keepValueType ? fieldValue : String.valueOf(fieldValue));
805799
next.add(
806800
copySeaTunnelRowWithNewValue(
807-
outRowType.getTotalFields(), row, aliasFieldIndex, value));
801+
outRowType.getTotalFields(), row, aliasFieldIndex, fieldValue));
808802
}
809803
} else {
810804
throw new SeaTunnelRuntimeException(
@@ -865,14 +859,13 @@ public SeaTunnelRowType lateralViewMapping(
865859
seaTunnelDataTypes[columnIndex] = seaTunnelDataType;
866860
}
867861
} else {
868-
// default string type
869-
SeaTunnelDataType seaTunnelDataType =
870-
PhysicalColumn.of(alias, BasicType.STRING_TYPE, 10L, true, "", "")
871-
.getDataType();
862+
863+
ArrayType arrayType = (ArrayType) zetaSQLType.getExpressionType(expression);
864+
872865
if (aliasIndex == -1) {
873866
fieldNames = ArrayUtils.add(fieldNames, alias);
874867
seaTunnelDataTypes =
875-
ArrayUtils.add(seaTunnelDataTypes, seaTunnelDataType);
868+
ArrayUtils.add(seaTunnelDataTypes, arrayType.getElementType());
876869
inputColumnsMapping.add(alias);
877870
}
878871
}

seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.seatunnel.api.table.type.SqlType;
2929
import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
3030
import org.apache.seatunnel.transform.exception.TransformException;
31+
import org.apache.seatunnel.transform.sql.zeta.functions.ArrayFunction;
3132

3233
import org.apache.commons.collections4.CollectionUtils;
3334

@@ -448,6 +449,7 @@ private SeaTunnelDataType<?> getFunctionType(Function function) {
448449
case ZetaSQLFunction.TRUNCATE:
449450
return BasicType.DOUBLE_TYPE;
450451
case ZetaSQLFunction.ARRAY:
452+
return ArrayFunction.castArrayTypeMapping(function, inputRowType);
451453
case ZetaSQLFunction.SPLIT:
452454
return ArrayType.STRING_ARRAY_TYPE;
453455
case ZetaSQLFunction.NOW:

0 commit comments

Comments
 (0)