Skip to content

Commit 1d8213c

Browse files
committed
Add support for legacy Date in Hive for Parquet
1 parent ad38de4 commit 1d8213c

File tree

13 files changed

+1081
-4
lines changed

13 files changed

+1081
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.base.util;
15+
16+
import com.google.common.collect.Range;
17+
import com.google.common.collect.RangeMap;
18+
import com.google.common.collect.TreeRangeMap;
19+
20+
import java.text.ParseException;
21+
import java.text.SimpleDateFormat;
22+
import java.time.LocalDate;
23+
import java.util.Date;
24+
import java.util.GregorianCalendar;
25+
import java.util.Map;
26+
import java.util.Optional;
27+
import java.util.TimeZone;
28+
29+
import static java.time.ZoneOffset.UTC;
30+
import static java.util.concurrent.TimeUnit.DAYS;
31+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
32+
33+
public final class CalendarUtils
34+
{
35+
static final LocalDate GREGORIAN_START_DATE = LocalDate.of(1582, 10, 15);
36+
static final LocalDate JULIAN_END_DATE = LocalDate.of(1582, 10, 4);
37+
38+
private static final TimeZone TZ_UTC = TimeZone.getTimeZone(UTC);
39+
private static final String DATE_FORMAT = "yyyy-MM-dd";
40+
private static final String DATE_TIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
41+
42+
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
43+
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
44+
format.setCalendar(new GregorianCalendar(TZ_UTC));
45+
return format;
46+
});
47+
48+
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
49+
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
50+
format.setCalendar(new GregorianCalendar(TZ_UTC));
51+
return format;
52+
});
53+
54+
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
55+
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
56+
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
57+
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
58+
format.setCalendar(prolepticGregorianCalendar);
59+
return format;
60+
});
61+
62+
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
63+
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
64+
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
65+
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
66+
format.setCalendar(prolepticGregorianCalendar);
67+
return format;
68+
});
69+
70+
// https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar#Difference_between_Julian_and_proleptic_Gregorian_calendar_dates
71+
private static final RangeMap<Integer, Integer> julianGregorianDiffs = TreeRangeMap.create();
72+
73+
private static final int JULIAN_COMMON_ERA_START_DAY;
74+
private static final long LAST_SWITCH_JULIAN_DAY;
75+
76+
static {
77+
julianGregorianDiffs.put(Range.lessThan(-682945), 2);
78+
julianGregorianDiffs.put(Range.closedOpen(-682945, -646420), 1);
79+
julianGregorianDiffs.put(Range.closedOpen(-646420, -609895), -0);
80+
julianGregorianDiffs.put(Range.closedOpen(-609895, -536845), -1);
81+
julianGregorianDiffs.put(Range.closedOpen(-536845, -500320), -2);
82+
julianGregorianDiffs.put(Range.closedOpen(-500320, -463795), -3);
83+
julianGregorianDiffs.put(Range.closedOpen(-463795, -390745), -4);
84+
julianGregorianDiffs.put(Range.closedOpen(-390745, -354220), -5);
85+
julianGregorianDiffs.put(Range.closedOpen(-354220, -317695), -6);
86+
julianGregorianDiffs.put(Range.closedOpen(-317695, -244645), -7);
87+
julianGregorianDiffs.put(Range.closedOpen(-244645, -208120), -8);
88+
julianGregorianDiffs.put(Range.closedOpen(-208120, -171595), -9);
89+
julianGregorianDiffs.put(Range.closedOpen(-171595, -141427), -10);
90+
julianGregorianDiffs.put(Range.atLeast(-141427), 0);
91+
92+
try {
93+
JULIAN_COMMON_ERA_START_DAY = (int) MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("0001-01-01").getTime());
94+
LAST_SWITCH_JULIAN_DAY = MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("1582-10-15").getTime());
95+
}
96+
catch (ParseException e) {
97+
throw new RuntimeException(e);
98+
}
99+
}
100+
101+
private CalendarUtils() {}
102+
103+
public static int convertDaysToProlepticGregorian(int julianDays)
104+
{
105+
if (julianDays < JULIAN_COMMON_ERA_START_DAY) {
106+
return convertDaysToProlepticDaysInternal(julianDays);
107+
}
108+
else if (julianDays < LAST_SWITCH_JULIAN_DAY) {
109+
return Optional.ofNullable(julianGregorianDiffs.getEntry(julianDays)).map(Map.Entry::getValue).orElse(0) + julianDays;
110+
}
111+
return julianDays;
112+
}
113+
114+
private static int convertDaysToProlepticDaysInternal(int hybridDays)
115+
{
116+
long hybridMillis = DAYS.toMillis(hybridDays);
117+
String hybridDateInString = HYBRID_CALENDAR_DATE_FORMAT.get().format(new Date(hybridMillis));
118+
long result;
119+
try {
120+
result = PROLEPTIC_CALENDAR_DATE_FORMAT.get().parse(hybridDateInString).getTime();
121+
}
122+
catch (ParseException e) {
123+
throw new RuntimeException(e);
124+
}
125+
long prolepticMillis = result;
126+
return (int) MILLISECONDS.toDays(prolepticMillis);
127+
}
128+
129+
public static long convertTimestampToProlepticGregorian(long epochMillis)
130+
{
131+
String dateTimeInString = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
132+
try {
133+
return PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
134+
}
135+
catch (ParseException e) {
136+
throw new RuntimeException(e);
137+
}
138+
}
139+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.base.util;
15+
16+
import com.google.common.collect.ImmutableList;
17+
import com.google.common.collect.ImmutableMap;
18+
import org.junit.jupiter.api.Test;
19+
20+
import java.text.ParseException;
21+
import java.time.Instant;
22+
import java.time.LocalDate;
23+
import java.time.LocalDateTime;
24+
import java.util.Date;
25+
import java.util.Map;
26+
27+
import static io.trino.plugin.base.util.CalendarUtils.HYBRID_CALENDAR_DATE_TIME_FORMAT;
28+
import static io.trino.plugin.base.util.CalendarUtils.PROLEPTIC_CALENDAR_DATE_TIME_FORMAT;
29+
import static io.trino.plugin.base.util.CalendarUtils.convertDaysToProlepticGregorian;
30+
import static io.trino.plugin.base.util.CalendarUtils.convertTimestampToProlepticGregorian;
31+
import static java.time.ZoneOffset.UTC;
32+
import static java.util.concurrent.TimeUnit.DAYS;
33+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
34+
import static org.assertj.core.api.Assertions.assertThat;
35+
36+
class TestCalendarUtils
37+
{
38+
public static long convertTimestampToHybrid(long epochMillis)
39+
{
40+
LocalDateTime localDateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), UTC);
41+
LocalDate localDate = localDateTime.toLocalDate();
42+
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
43+
localDateTime = LocalDateTime.of(CalendarUtils.GREGORIAN_START_DATE, localDateTime.toLocalTime());
44+
epochMillis = localDateTime.toInstant(UTC).toEpochMilli();
45+
}
46+
String dateTimeInString = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
47+
try {
48+
return HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
49+
}
50+
catch (ParseException e) {
51+
throw new RuntimeException(e);
52+
}
53+
}
54+
55+
static int convertProlepticDaysToHybridDays(int prolepticDays)
56+
{
57+
LocalDate localDate = LocalDate.ofEpochDay(prolepticDays);
58+
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
59+
localDate = CalendarUtils.GREGORIAN_START_DATE;
60+
}
61+
String dateInStr = CalendarUtils.PROLEPTIC_CALENDAR_DATE_FORMAT.get().format(new Date(DAYS.toMillis(localDate.toEpochDay())));
62+
return toHybridDaysFromString(dateInStr);
63+
}
64+
65+
static int toHybridDaysFromString(String date)
66+
{
67+
try {
68+
return (int) MILLISECONDS.toDays(CalendarUtils.HYBRID_CALENDAR_DATE_FORMAT.get().parse(date).getTime());
69+
}
70+
catch (ParseException e) {
71+
throw new RuntimeException(e);
72+
}
73+
}
74+
75+
@Test
76+
void testConvertGregorianDaysToAndFromHybridDays()
77+
{
78+
ImmutableList<String> dates = ImmutableList.of(
79+
"0001-01-01",
80+
"1000-01-01",
81+
"1582-10-04",
82+
"1582-10-15",
83+
"1788-09-10",
84+
"1888-12-31",
85+
"1969-12-31",
86+
"1970-01-01",
87+
"2024-03-30");
88+
89+
dates.forEach(date -> {
90+
int julianDays = toHybridDaysFromString(date);
91+
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
92+
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(julianDays);
93+
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
94+
});
95+
}
96+
97+
@Test
98+
void testConvertHybridToProlepticDateForLeapYears()
99+
{
100+
ImmutableMap<String, String> dates = ImmutableMap.<String, String>builder()
101+
.put("0004-02-29", "0004-02-29")
102+
.put("0100-02-29", "0100-03-01")
103+
.put("0196-02-29", "0196-02-29")
104+
.put("0200-02-29", "0200-03-01")
105+
.put("0204-02-29", "0204-02-29")
106+
.put("0400-02-29", "0400-02-29")
107+
.put("1000-02-29", "1000-03-01")
108+
.put("1200-02-29", "1200-02-29")
109+
.put("1600-02-29", "1600-02-29")
110+
.put("1700-02-29", "1700-03-01")
111+
.put("2000-02-29", "2000-02-29")
112+
.buildOrThrow();
113+
114+
dates.forEach((julianDate, gregDate) -> {
115+
int julianDays = toHybridDaysFromString(julianDate);
116+
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
117+
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
118+
});
119+
}
120+
121+
@Test
122+
void testConvertDatesFromSwitchesBoarders()
123+
{
124+
ImmutableList<String> dates = ImmutableList.<String>builder()
125+
.add("0001-01-01")
126+
.add("0100-03-01")
127+
.add("0100-03-02")
128+
.add("0200-02-28")
129+
.add("0200-03-01")
130+
.add("0300-02-28")
131+
.add("0300-03-01")
132+
.add("0500-02-27")
133+
.add("0500-02-28")
134+
.add("0600-02-26")
135+
.add("0600-02-27")
136+
.add("0700-02-25")
137+
.add("0700-02-26")
138+
.add("0900-02-24")
139+
.add("0900-02-25")
140+
.add("1000-02-23")
141+
.add("1000-02-24")
142+
.add("1100-02-22")
143+
.add("1100-02-23")
144+
.add("1300-02-21")
145+
.add("1300-02-22")
146+
.add("1400-02-20")
147+
.add("1400-02-21")
148+
.add("1500-02-19")
149+
.add("1500-02-20")
150+
.add("1582-02-04")
151+
.build();
152+
153+
dates.forEach(date -> {
154+
int hybridDays = toHybridDaysFromString(date);
155+
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
156+
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(hybridDays);
157+
assertThat(convertDaysToProlepticGregorian(hybridDays)).isEqualTo(gregorianDays);
158+
});
159+
}
160+
161+
@Test
162+
void testRebaseNotExistedDatesInHybridCalendar()
163+
{
164+
Map<String, String> dates = ImmutableMap.<String, String>builder()
165+
.put("1582-10-04", "1582-10-04")
166+
.put("1582-10-05", "1582-10-15")
167+
.put("1582-10-06", "1582-10-15")
168+
.put("1582-10-07", "1582-10-15")
169+
.put("1582-10-08", "1582-10-15")
170+
.put("1582-10-09", "1582-10-15")
171+
.put("1582-10-11", "1582-10-15")
172+
.put("1582-10-12", "1582-10-15")
173+
.put("1582-10-13", "1582-10-15")
174+
.put("1582-10-14", "1582-10-15")
175+
.put("1582-10-15", "1582-10-15")
176+
.buildOrThrow();
177+
178+
dates.forEach((gregDate, hybridDate) -> {
179+
int hybridDays = toHybridDaysFromString(hybridDate);
180+
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
181+
int actualHybridDays = convertProlepticDaysToHybridDays(gregorianDays);
182+
assertThat(actualHybridDays).isEqualTo(hybridDays);
183+
});
184+
}
185+
186+
@Test
187+
void testConvertGregorianTimestampToAndFromHybridDays()
188+
{
189+
ImmutableList<String> timestamps = ImmutableList.of(
190+
"0001-01-01 15:15:15.123",
191+
"1000-01-01 15:15:15.123",
192+
"1582-10-04 15:15:15.123",
193+
"1582-10-15 15:15:15.123",
194+
"1788-09-10 15:15:15.123",
195+
"1888-12-31 15:15:15.123",
196+
"1969-12-31 15:15:15.123",
197+
"1970-01-01 15:15:15.123",
198+
"2024-03-30 15:15:15.123");
199+
200+
timestamps.forEach(timestamp -> {
201+
try {
202+
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
203+
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
204+
assertThat(convertTimestampToProlepticGregorian(julianMillis)).isEqualTo(gregorianMillis);
205+
}
206+
catch (ParseException e) {
207+
throw new RuntimeException(e);
208+
}
209+
});
210+
}
211+
212+
@Test
213+
void testRebaseNotExistedTimestampInHybridCalendar()
214+
{
215+
Map<String, String> timestamps = ImmutableMap.<String, String>builder()
216+
.put("1582-10-04 15:15:15.123", "1582-10-04 15:15:15.123")
217+
.put("1582-10-05 15:15:15.123", "1582-10-15 15:15:15.123")
218+
.put("1582-10-06 15:15:15.123", "1582-10-15 15:15:15.123")
219+
.put("1582-10-07 15:15:15.123", "1582-10-15 15:15:15.123")
220+
.put("1582-10-08 15:15:15.123", "1582-10-15 15:15:15.123")
221+
.put("1582-10-09 15:15:15.123", "1582-10-15 15:15:15.123")
222+
.put("1582-10-11 15:15:15.123", "1582-10-15 15:15:15.123")
223+
.put("1582-10-12 15:15:15.123", "1582-10-15 15:15:15.123")
224+
.put("1582-10-13 15:15:15.123", "1582-10-15 15:15:15.123")
225+
.put("1582-10-14 15:15:15.123", "1582-10-15 15:15:15.123")
226+
.put("1582-10-15 15:15:15.123", "1582-10-15 15:15:15.123")
227+
.buildOrThrow();
228+
229+
timestamps.forEach((gregorianTmst, hybridTmst) -> {
230+
try {
231+
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(hybridTmst).getTime();
232+
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(gregorianTmst).getTime();
233+
assertThat(convertTimestampToHybrid(gregorianMillis)).isEqualTo(julianMillis);
234+
}
235+
catch (ParseException e) {
236+
throw new RuntimeException(e);
237+
}
238+
});
239+
}
240+
}

0 commit comments

Comments
 (0)