31
31
import org .apache .seatunnel .common .utils .TimeUtils ;
32
32
import org .apache .seatunnel .format .text .constant .TextFormatConstant ;
33
33
import org .apache .seatunnel .format .text .exception .SeaTunnelTextFormatException ;
34
+ import org .apache .seatunnel .format .text .splitor .DefaultTextLineSplitor ;
35
+ import org .apache .seatunnel .format .text .splitor .TextLineSplitor ;
34
36
35
37
import org .apache .commons .lang3 .StringUtils ;
36
38
@@ -50,20 +52,23 @@ public class TextDeserializationSchema implements DeserializationSchema<SeaTunne
50
52
private final DateTimeUtils .Formatter dateTimeFormatter ;
51
53
private final TimeUtils .Formatter timeFormatter ;
52
54
private final String encoding ;
55
+ private final TextLineSplitor splitor ;
53
56
54
57
private TextDeserializationSchema (
55
58
@ NonNull SeaTunnelRowType seaTunnelRowType ,
56
59
String [] separators ,
57
60
DateUtils .Formatter dateFormatter ,
58
61
DateTimeUtils .Formatter dateTimeFormatter ,
59
62
TimeUtils .Formatter timeFormatter ,
60
- String encoding ) {
63
+ String encoding ,
64
+ TextLineSplitor splitor ) {
61
65
this .seaTunnelRowType = seaTunnelRowType ;
62
66
this .separators = separators ;
63
67
this .dateFormatter = dateFormatter ;
64
68
this .dateTimeFormatter = dateTimeFormatter ;
65
69
this .timeFormatter = timeFormatter ;
66
70
this .encoding = encoding ;
71
+ this .splitor = splitor ;
67
72
}
68
73
69
74
public static Builder builder () {
@@ -78,6 +83,7 @@ public static class Builder {
78
83
DateTimeUtils .Formatter .YYYY_MM_DD_HH_MM_SS ;
79
84
private TimeUtils .Formatter timeFormatter = TimeUtils .Formatter .HH_MM_SS ;
80
85
private String encoding = StandardCharsets .UTF_8 .name ();
86
+ private TextLineSplitor textLineSplitor = new DefaultTextLineSplitor ();
81
87
82
88
private Builder () {}
83
89
@@ -116,14 +122,20 @@ public Builder encoding(String encoding) {
116
122
return this ;
117
123
}
118
124
125
+ public Builder textLineSplitor (TextLineSplitor splitor ) {
126
+ this .textLineSplitor = splitor ;
127
+ return this ;
128
+ }
129
+
119
130
public TextDeserializationSchema build () {
120
131
return new TextDeserializationSchema (
121
132
seaTunnelRowType ,
122
133
separators ,
123
134
dateFormatter ,
124
135
dateTimeFormatter ,
125
136
timeFormatter ,
126
- encoding );
137
+ encoding ,
138
+ textLineSplitor );
127
139
}
128
140
}
129
141
@@ -145,7 +157,7 @@ public SeaTunnelDataType<SeaTunnelRow> getProducedType() {
145
157
146
158
private Map <Integer , String > splitLineBySeaTunnelRowType (
147
159
String line , SeaTunnelRowType seaTunnelRowType , int level ) {
148
- String [] splits = line . split ( separators [level ], - 1 );
160
+ String [] splits = splitor . spliteLine ( line , separators [level ]);
149
161
LinkedHashMap <Integer , String > splitsMap = new LinkedHashMap <>();
150
162
SeaTunnelDataType <?>[] fieldTypes = seaTunnelRowType .getFieldTypes ();
151
163
for (int i = 0 ; i < splits .length ; i ++) {
0 commit comments