Skip to content

Commit 19888e7

Browse files
authored
[Hotfix] fix http source can not read yyyy-MM-dd HH:mm:ss format bug & Improve DateTime Utils (#6601)
1 parent 66d8502 commit 19888e7

File tree

16 files changed

+795
-162
lines changed

16 files changed

+795
-162
lines changed

Diff for: seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateTimeUtils.java

+174-1
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,25 @@
1818
package org.apache.seatunnel.common.utils;
1919

2020
import java.time.Instant;
21+
import java.time.LocalDate;
2122
import java.time.LocalDateTime;
23+
import java.time.LocalTime;
2224
import java.time.ZoneId;
2325
import java.time.format.DateTimeFormatter;
26+
import java.time.format.DateTimeFormatterBuilder;
27+
import java.time.format.SignStyle;
28+
import java.time.temporal.TemporalAccessor;
29+
import java.time.temporal.TemporalQueries;
2430
import java.util.HashMap;
31+
import java.util.LinkedHashMap;
32+
import java.util.LinkedHashSet;
2533
import java.util.Map;
34+
import java.util.Set;
35+
import java.util.regex.Pattern;
36+
37+
import static java.time.temporal.ChronoField.DAY_OF_MONTH;
38+
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
39+
import static java.time.temporal.ChronoField.YEAR;
2640

2741
public class DateTimeUtils {
2842

@@ -48,6 +62,162 @@ public class DateTimeUtils {
4862
FORMATTER_MAP.put(
4963
Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601,
5064
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));
65+
FORMATTER_MAP.put(
66+
Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601,
67+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601.value));
68+
FORMATTER_MAP.put(
69+
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601,
70+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601.value));
71+
FORMATTER_MAP.put(
72+
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601,
73+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601.value));
74+
}
75+
76+
// if the datatime string length is 19, find the DateTimeFormatter from this map
77+
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP =
78+
new LinkedHashMap<>();
79+
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
80+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();
81+
82+
// if the datatime string length bigger than 19, find the DateTimeFormatter from this map
83+
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP =
84+
new LinkedHashMap<>();
85+
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
86+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();
87+
88+
// if the datatime string length is 14, use this formatter
89+
public static final DateTimeFormatter YYYY_MM_DD_HH_MM_SS_14_FORMATTER =
90+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT.value);
91+
92+
static {
93+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
94+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
95+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS.value));
96+
97+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
98+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}.*"),
99+
new DateTimeFormatterBuilder()
100+
.parseCaseInsensitive()
101+
.append(DateTimeFormatter.ISO_LOCAL_DATE)
102+
.appendLiteral(' ')
103+
.append(DateTimeFormatter.ISO_LOCAL_TIME)
104+
.toFormatter());
105+
106+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
107+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"),
108+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));
109+
110+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
111+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}.*"),
112+
DateTimeFormatter.ISO_LOCAL_DATE_TIME);
113+
114+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
115+
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
116+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SLASH.value));
117+
118+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
119+
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}.*"),
120+
new DateTimeFormatterBuilder()
121+
.parseCaseInsensitive()
122+
.append(
123+
new DateTimeFormatterBuilder()
124+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
125+
.appendLiteral('/')
126+
.appendValue(MONTH_OF_YEAR, 2)
127+
.appendLiteral('/')
128+
.appendValue(DAY_OF_MONTH, 2)
129+
.toFormatter())
130+
.appendLiteral(' ')
131+
.append(DateTimeFormatter.ISO_LOCAL_TIME)
132+
.toFormatter());
133+
134+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
135+
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
136+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SPOT.value));
137+
138+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
139+
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*"),
140+
new DateTimeFormatterBuilder()
141+
.parseCaseInsensitive()
142+
.append(
143+
new DateTimeFormatterBuilder()
144+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
145+
.appendLiteral('.')
146+
.appendValue(MONTH_OF_YEAR, 2)
147+
.appendLiteral('.')
148+
.appendValue(DAY_OF_MONTH, 2)
149+
.toFormatter())
150+
.appendLiteral(' ')
151+
.append(DateTimeFormatter.ISO_LOCAL_TIME)
152+
.toFormatter());
153+
154+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
155+
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日\\s\\d{2}时\\d{2}分\\d{2}秒"),
156+
DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH时mm分ss秒"));
157+
158+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET.addAll(
159+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.entrySet());
160+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET.addAll(
161+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.entrySet());
162+
}
163+
164+
/**
165+
* gave a datetime string and return the {@link DateTimeFormatter} which can be used to parse
166+
* it.
167+
*
168+
* @param dateTime eg: 2020-02-03 12:12:10.101
169+
* @return the DateTimeFormatter matched, will return null when not matched any pattern
170+
*/
171+
public static DateTimeFormatter matchDateTimeFormatter(String dateTime) {
172+
if (dateTime.length() == 19) {
173+
for (Map.Entry<Pattern, DateTimeFormatter> entry :
174+
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET) {
175+
if (entry.getKey().matcher(dateTime).matches()) {
176+
return entry.getValue();
177+
}
178+
}
179+
} else if (dateTime.length() > 19) {
180+
for (Map.Entry<Pattern, DateTimeFormatter> entry :
181+
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET) {
182+
if (entry.getKey().matcher(dateTime).matches()) {
183+
return entry.getValue();
184+
}
185+
}
186+
} else if (dateTime.length() == 14) {
187+
return YYYY_MM_DD_HH_MM_SS_14_FORMATTER;
188+
}
189+
return null;
190+
}
191+
192+
public static LocalDateTime parse(String dateTime, DateTimeFormatter dateTimeFormatter) {
193+
TemporalAccessor parsedTimestamp = dateTimeFormatter.parse(dateTime);
194+
LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime());
195+
LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate());
196+
return LocalDateTime.of(localDate, localTime);
197+
}
198+
199+
/**
200+
* gave a datetime string and return {@link LocalDateTime}
201+
*
202+
* <p>Due to the need to determine the rules of the formatter through regular expressions, there
203+
* will be a certain performance loss. When tested on 8c16g macos, the most significant
204+
* performance decrease compared to directly passing the formatter is
205+
* 'Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*")' has increased from 4.5
206+
* seconds to 10 seconds in a scenario where 1000w calculations are performed.
207+
*
208+
* <p>Analysis shows that there are two main reasons: one is that the regular expression
209+
* position in the map is 4, before this, three regular expression matches are required.
210+
*
211+
* <p>Another reason is to support the length of non fixed millisecond bits (minimum 0, maximum
212+
* 9), we used {@link DateTimeFormatter#ISO_LOCAL_TIME}, which also increases the time for time
213+
* conversion.
214+
*
215+
* @param dateTime eg: 2020-02-03 12:12:10.101
216+
* @return {@link LocalDateTime}
217+
*/
218+
public static LocalDateTime parse(String dateTime) {
219+
DateTimeFormatter dateTimeFormatter = matchDateTimeFormatter(dateTime);
220+
return LocalDateTime.parse(dateTime, dateTimeFormatter);
51221
}
52222

53223
public static LocalDateTime parse(String dateTime, Formatter formatter) {
@@ -78,7 +248,10 @@ public enum Formatter {
78248
YYYY_MM_DD_HH_MM_SS_SPOT("yyyy.MM.dd HH:mm:ss"),
79249
YYYY_MM_DD_HH_MM_SS_SLASH("yyyy/MM/dd HH:mm:ss"),
80250
YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss"),
81-
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss");
251+
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss"),
252+
YYYY_MM_DD_HH_MM_SS_SSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSS"),
253+
YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"),
254+
YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS");
82255

83256
private final String value;
84257

Diff for: seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateUtils.java

+107
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,15 @@
1919

2020
import java.time.LocalDate;
2121
import java.time.format.DateTimeFormatter;
22+
import java.time.format.DateTimeFormatterBuilder;
23+
import java.time.format.SignStyle;
2224
import java.util.HashMap;
2325
import java.util.Map;
26+
import java.util.regex.Pattern;
27+
28+
import static java.time.temporal.ChronoField.DAY_OF_MONTH;
29+
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
30+
import static java.time.temporal.ChronoField.YEAR;
2431

2532
public class DateUtils {
2633
private static final Map<Formatter, DateTimeFormatter> FORMATTER_MAP = new HashMap<>();
@@ -36,6 +43,106 @@ public class DateUtils {
3643
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_SLASH.value));
3744
}
3845

46+
public static final Pattern[] PATTERN_ARRAY =
47+
new Pattern[] {
48+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}"),
49+
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日"),
50+
Pattern.compile("\\d{4}/\\d{2}/\\d{2}"),
51+
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}"),
52+
Pattern.compile("\\d{8}")
53+
};
54+
55+
public static final Map<Pattern, DateTimeFormatter> DATE_FORMATTER_MAP = new HashMap();
56+
57+
static {
58+
DATE_FORMATTER_MAP.put(
59+
PATTERN_ARRAY[0],
60+
new DateTimeFormatterBuilder()
61+
.parseCaseInsensitive()
62+
.append(DateTimeFormatter.ISO_LOCAL_DATE)
63+
.toFormatter());
64+
65+
DATE_FORMATTER_MAP.put(
66+
PATTERN_ARRAY[1],
67+
new DateTimeFormatterBuilder()
68+
.parseCaseInsensitive()
69+
.append(
70+
new DateTimeFormatterBuilder()
71+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
72+
.appendLiteral("年")
73+
.appendValue(MONTH_OF_YEAR, 2)
74+
.appendLiteral("月")
75+
.appendValue(DAY_OF_MONTH, 2)
76+
.appendLiteral("日")
77+
.toFormatter())
78+
.toFormatter());
79+
80+
DATE_FORMATTER_MAP.put(
81+
PATTERN_ARRAY[2],
82+
new DateTimeFormatterBuilder()
83+
.parseCaseInsensitive()
84+
.append(
85+
new DateTimeFormatterBuilder()
86+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
87+
.appendLiteral('/')
88+
.appendValue(MONTH_OF_YEAR, 2)
89+
.appendLiteral('/')
90+
.appendValue(DAY_OF_MONTH, 2)
91+
.toFormatter())
92+
.toFormatter());
93+
94+
DATE_FORMATTER_MAP.put(
95+
PATTERN_ARRAY[3],
96+
new DateTimeFormatterBuilder()
97+
.parseCaseInsensitive()
98+
.append(
99+
new DateTimeFormatterBuilder()
100+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
101+
.appendLiteral('.')
102+
.appendValue(MONTH_OF_YEAR, 2)
103+
.appendLiteral('.')
104+
.appendValue(DAY_OF_MONTH, 2)
105+
.toFormatter())
106+
.toFormatter());
107+
108+
DATE_FORMATTER_MAP.put(
109+
PATTERN_ARRAY[4],
110+
new DateTimeFormatterBuilder()
111+
.parseCaseInsensitive()
112+
.append(
113+
new DateTimeFormatterBuilder()
114+
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
115+
.appendValue(MONTH_OF_YEAR, 2)
116+
.appendValue(DAY_OF_MONTH, 2)
117+
.toFormatter())
118+
.toFormatter());
119+
}
120+
121+
/**
122+
* gave a date string and return the {@link DateTimeFormatter} which can be used to parse it.
123+
*
124+
* @param dateTime eg: 2020-02-03
125+
* @return the DateTimeFormatter matched, will return null when not matched any pattern in
126+
* {@link #PATTERN_ARRAY}
127+
*/
128+
public static DateTimeFormatter matchDateFormatter(String dateTime) {
129+
for (int j = 0; j < PATTERN_ARRAY.length; j++) {
130+
if (PATTERN_ARRAY[j].matcher(dateTime).matches()) {
131+
return DATE_FORMATTER_MAP.get(PATTERN_ARRAY[j]);
132+
}
133+
}
134+
return null;
135+
}
136+
137+
public static LocalDate parse(String date) {
138+
DateTimeFormatter dateTimeFormatter = matchDateFormatter(date);
139+
return parse(date, dateTimeFormatter);
140+
}
141+
142+
public static LocalDate parse(String date, DateTimeFormatter dateTimeFormatter) {
143+
return LocalDate.parse(date, dateTimeFormatter);
144+
}
145+
39146
public static LocalDate parse(String date, Formatter formatter) {
40147
return LocalDate.parse(date, FORMATTER_MAP.get(formatter));
41148
}

0 commit comments

Comments
 (0)