Skip to content

Commit f425c41

Browse files
committed
Add support for legacy Date in Hive for Parquet
1 parent ad38de4 commit f425c41

File tree

14 files changed

+1091
-1007
lines changed

14 files changed

+1091
-1007
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.base.util;
15+
16+
import com.google.common.collect.Range;
17+
import com.google.common.collect.RangeMap;
18+
import com.google.common.collect.TreeRangeMap;
19+
20+
import java.text.ParseException;
21+
import java.text.SimpleDateFormat;
22+
import java.time.LocalDate;
23+
import java.util.Date;
24+
import java.util.GregorianCalendar;
25+
import java.util.Map;
26+
import java.util.Optional;
27+
import java.util.TimeZone;
28+
29+
import static java.time.ZoneOffset.UTC;
30+
import static java.util.concurrent.TimeUnit.DAYS;
31+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
32+
33+
public final class CalendarUtils
34+
{
35+
static final LocalDate GREGORIAN_START_DATE = LocalDate.of(1582, 10, 15);
36+
static final LocalDate JULIAN_END_DATE = LocalDate.of(1582, 10, 4);
37+
38+
private static final TimeZone TZ_UTC = TimeZone.getTimeZone(UTC);
39+
private static final String DATE_FORMAT = "yyyy-MM-dd";
40+
private static final String DATE_TIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
41+
42+
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
43+
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
44+
format.setCalendar(new GregorianCalendar(TZ_UTC));
45+
return format;
46+
});
47+
48+
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
49+
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
50+
format.setCalendar(new GregorianCalendar(TZ_UTC));
51+
return format;
52+
});
53+
54+
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
55+
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
56+
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
57+
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
58+
format.setCalendar(prolepticGregorianCalendar);
59+
return format;
60+
});
61+
62+
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
63+
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
64+
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
65+
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
66+
format.setCalendar(prolepticGregorianCalendar);
67+
return format;
68+
});
69+
70+
// https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar#Difference_between_Julian_and_proleptic_Gregorian_calendar_dates
71+
private static final RangeMap<Integer, Integer> julianGregorianDiffs = TreeRangeMap.create();
72+
// The start day of Common Era (CE) ('0001-01-01') in Julian calendar.
73+
private static final int JULIAN_COMMON_ERA_START_DAY;
74+
75+
// 15-10-1582
76+
public static final long LAST_SWITCH_JULIAN_DAY;
77+
78+
static {
79+
julianGregorianDiffs.put(Range.lessThan(-682945), 2);
80+
julianGregorianDiffs.put(Range.closedOpen(-682945, -646420), 1);
81+
julianGregorianDiffs.put(Range.closedOpen(-646420, -609895), -0);
82+
julianGregorianDiffs.put(Range.closedOpen(-609895, -536845), -1);
83+
julianGregorianDiffs.put(Range.closedOpen(-536845, -500320), -2);
84+
julianGregorianDiffs.put(Range.closedOpen(-500320, -463795), -3);
85+
julianGregorianDiffs.put(Range.closedOpen(-463795, -390745), -4);
86+
julianGregorianDiffs.put(Range.closedOpen(-390745, -354220), -5);
87+
julianGregorianDiffs.put(Range.closedOpen(-354220, -317695), -6);
88+
julianGregorianDiffs.put(Range.closedOpen(-317695, -244645), -7);
89+
julianGregorianDiffs.put(Range.closedOpen(-244645, -208120), -8);
90+
julianGregorianDiffs.put(Range.closedOpen(-208120, -171595), -9);
91+
julianGregorianDiffs.put(Range.closedOpen(-171595, -141427), -10);
92+
julianGregorianDiffs.put(Range.atLeast(-141427), 0);
93+
94+
try {
95+
JULIAN_COMMON_ERA_START_DAY = (int) MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("0001-01-01").getTime());
96+
LAST_SWITCH_JULIAN_DAY = MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("1582-10-15").getTime());
97+
}
98+
catch (ParseException e) {
99+
throw new RuntimeException(e);
100+
}
101+
}
102+
103+
private CalendarUtils() {}
104+
105+
public static int convertDaysToProlepticGregorian(int julianDays)
106+
{
107+
if (julianDays < JULIAN_COMMON_ERA_START_DAY) {
108+
return convertDaysToProlepticDaysInternal(julianDays);
109+
}
110+
else if (julianDays < LAST_SWITCH_JULIAN_DAY) {
111+
return Optional.ofNullable(julianGregorianDiffs.getEntry(julianDays)).map(Map.Entry::getValue).orElse(0) + julianDays;
112+
}
113+
return julianDays;
114+
}
115+
116+
private static int convertDaysToProlepticDaysInternal(int hybridDays)
117+
{
118+
long hybridMillis = DAYS.toMillis(hybridDays);
119+
String hybridDateInString = HYBRID_CALENDAR_DATE_FORMAT.get().format(new Date(hybridMillis));
120+
long result;
121+
try {
122+
result = PROLEPTIC_CALENDAR_DATE_FORMAT.get().parse(hybridDateInString).getTime();
123+
}
124+
catch (ParseException e) {
125+
throw new RuntimeException(e);
126+
}
127+
long prolepticMillis = result;
128+
return (int) MILLISECONDS.toDays(prolepticMillis);
129+
}
130+
131+
public static long convertTimestampToProlepticGregorian(long epochMillis)
132+
{
133+
String dateTimeInString = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
134+
try {
135+
return PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
136+
}
137+
catch (ParseException e) {
138+
throw new RuntimeException(e);
139+
}
140+
}
141+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.base.util;
15+
16+
import com.google.common.collect.ImmutableList;
17+
import com.google.common.collect.ImmutableMap;
18+
import org.junit.jupiter.api.Test;
19+
20+
import java.text.ParseException;
21+
import java.time.Instant;
22+
import java.time.LocalDate;
23+
import java.time.LocalDateTime;
24+
import java.util.Date;
25+
import java.util.Map;
26+
27+
import static io.trino.plugin.base.util.CalendarUtils.HYBRID_CALENDAR_DATE_TIME_FORMAT;
28+
import static io.trino.plugin.base.util.CalendarUtils.PROLEPTIC_CALENDAR_DATE_TIME_FORMAT;
29+
import static io.trino.plugin.base.util.CalendarUtils.convertDaysToProlepticGregorian;
30+
import static io.trino.plugin.base.util.CalendarUtils.convertTimestampToProlepticGregorian;
31+
import static java.time.ZoneOffset.UTC;
32+
import static java.util.concurrent.TimeUnit.DAYS;
33+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
34+
import static org.assertj.core.api.Assertions.assertThat;
35+
36+
class TestCalendarUtils
37+
{
38+
public static long convertTimestampToHybrid(long epochMillis)
39+
{
40+
LocalDateTime localDateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), UTC);
41+
LocalDate localDate = localDateTime.toLocalDate();
42+
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
43+
localDateTime = LocalDateTime.of(CalendarUtils.GREGORIAN_START_DATE, localDateTime.toLocalTime());
44+
epochMillis = localDateTime.toInstant(UTC).toEpochMilli();
45+
}
46+
String dateTimeInString = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
47+
try {
48+
return HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
49+
}
50+
catch (ParseException e) {
51+
throw new RuntimeException(e);
52+
}
53+
}
54+
55+
static int convertProlepticDaysToHybridDays(int prolepticDays)
56+
{
57+
LocalDate localDate = LocalDate.ofEpochDay(prolepticDays);
58+
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
59+
localDate = CalendarUtils.GREGORIAN_START_DATE;
60+
}
61+
String dateInStr = CalendarUtils.PROLEPTIC_CALENDAR_DATE_FORMAT.get().format(new Date(DAYS.toMillis(localDate.toEpochDay())));
62+
return toHybridDaysFromString(dateInStr);
63+
}
64+
65+
static int toHybridDaysFromString(String date)
66+
{
67+
try {
68+
return (int) MILLISECONDS.toDays(CalendarUtils.HYBRID_CALENDAR_DATE_FORMAT.get().parse(date).getTime());
69+
}
70+
catch (ParseException e) {
71+
throw new RuntimeException(e);
72+
}
73+
}
74+
75+
@Test
76+
void testConvertGregorianDaysToAndFromHybridDays()
77+
{
78+
ImmutableList<String> dates = ImmutableList.of(
79+
"0001-01-01",
80+
"1000-01-01",
81+
"1582-10-04",
82+
"1582-10-15",
83+
"1788-09-10",
84+
"1888-12-31",
85+
"1969-12-31",
86+
"1970-01-01",
87+
"2024-03-30");
88+
89+
dates.forEach(date -> {
90+
int julianDays = toHybridDaysFromString(date);
91+
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
92+
93+
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(julianDays);
94+
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
95+
});
96+
}
97+
98+
@Test
99+
void testConvertHybridToProlepticDateForLeapYears()
100+
{
101+
ImmutableMap<String, String> dates = ImmutableMap.<String, String>builder()
102+
.put("0004-02-29", "0004-02-29")
103+
.put("0100-02-29", "0100-03-01")
104+
.put("0196-02-29", "0196-02-29")
105+
.put("0200-02-29", "0200-03-01")
106+
.put("0204-02-29", "0204-02-29")
107+
.put("0400-02-29", "0400-02-29")
108+
.put("1000-02-29", "1000-03-01")
109+
.put("1200-02-29", "1200-02-29")
110+
.put("1600-02-29", "1600-02-29")
111+
.put("1700-02-29", "1700-03-01")
112+
.put("2000-02-29", "2000-02-29")
113+
.buildOrThrow();
114+
115+
dates.forEach((julianDate, gregDate) -> {
116+
int julianDays = toHybridDaysFromString(julianDate);
117+
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
118+
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
119+
});
120+
}
121+
122+
@Test
123+
void testConvertDatesFromSwitchesBoarders()
124+
{
125+
ImmutableList<String> dates = ImmutableList.<String>builder()
126+
.add("0001-01-01")
127+
.add("0100-03-01")
128+
.add("0100-03-02")
129+
.add("0200-02-28")
130+
.add("0200-03-01")
131+
.add("0300-02-28")
132+
.add("0300-03-01")
133+
.add("0500-02-27")
134+
.add("0500-02-28")
135+
.add("0600-02-26")
136+
.add("0600-02-27")
137+
.add("0700-02-25")
138+
.add("0700-02-26")
139+
.add("0900-02-24")
140+
.add("0900-02-25")
141+
.add("1000-02-23")
142+
.add("1000-02-24")
143+
.add("1100-02-22")
144+
.add("1100-02-23")
145+
.add("1300-02-21")
146+
.add("1300-02-22")
147+
.add("1400-02-20")
148+
.add("1400-02-21")
149+
.add("1500-02-19")
150+
.add("1500-02-20")
151+
.add("1582-02-04")
152+
.build();
153+
154+
dates.forEach(date -> {
155+
int hybridDays = toHybridDaysFromString(date);
156+
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
157+
158+
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(hybridDays);
159+
assertThat(convertDaysToProlepticGregorian(hybridDays)).isEqualTo(gregorianDays);
160+
});
161+
}
162+
163+
@Test
164+
void testRebaseNotExistedDatesInHybridCalendar()
165+
{
166+
Map<String, String> dates = ImmutableMap.<String, String>builder()
167+
.put("1582-10-04", "1582-10-04")
168+
.put("1582-10-05", "1582-10-15")
169+
.put("1582-10-06", "1582-10-15")
170+
.put("1582-10-07", "1582-10-15")
171+
.put("1582-10-08", "1582-10-15")
172+
.put("1582-10-09", "1582-10-15")
173+
.put("1582-10-11", "1582-10-15")
174+
.put("1582-10-12", "1582-10-15")
175+
.put("1582-10-13", "1582-10-15")
176+
.put("1582-10-14", "1582-10-15")
177+
.put("1582-10-15", "1582-10-15")
178+
.buildOrThrow();
179+
180+
dates.forEach((gregDate, hybridDate) -> {
181+
int hybridDays = toHybridDaysFromString(hybridDate);
182+
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
183+
// int actualHybridDays = convertDaysToHybridCalendar(gregorianDays);
184+
int actualHybridDays = convertProlepticDaysToHybridDays(gregorianDays);
185+
assertThat(actualHybridDays).isEqualTo(hybridDays);
186+
});
187+
}
188+
189+
@Test
190+
void testConvertGregorianTimestampToAndFromHybridDays()
191+
{
192+
ImmutableList<String> timestamps = ImmutableList.of(
193+
"0001-01-01 15:15:15.123",
194+
"1000-01-01 15:15:15.123",
195+
"1582-10-04 15:15:15.123",
196+
"1582-10-15 15:15:15.123",
197+
"1788-09-10 15:15:15.123",
198+
"1888-12-31 15:15:15.123",
199+
"1969-12-31 15:15:15.123",
200+
"1970-01-01 15:15:15.123",
201+
"2024-03-30 15:15:15.123");
202+
203+
timestamps.forEach(timestamp -> {
204+
try {
205+
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
206+
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
207+
assertThat(convertTimestampToProlepticGregorian(julianMillis)).isEqualTo(gregorianMillis);
208+
}
209+
catch (ParseException e) {
210+
throw new RuntimeException(e);
211+
}
212+
});
213+
}
214+
215+
@Test
216+
void testRebaseNotExistedTimestampInHybridCalendar()
217+
{
218+
Map<String, String> timestamps = ImmutableMap.<String, String>builder()
219+
.put("1582-10-04 15:15:15.123", "1582-10-04 15:15:15.123")
220+
.put("1582-10-05 15:15:15.123", "1582-10-15 15:15:15.123")
221+
.put("1582-10-06 15:15:15.123", "1582-10-15 15:15:15.123")
222+
.put("1582-10-07 15:15:15.123", "1582-10-15 15:15:15.123")
223+
.put("1582-10-08 15:15:15.123", "1582-10-15 15:15:15.123")
224+
.put("1582-10-09 15:15:15.123", "1582-10-15 15:15:15.123")
225+
.put("1582-10-11 15:15:15.123", "1582-10-15 15:15:15.123")
226+
.put("1582-10-12 15:15:15.123", "1582-10-15 15:15:15.123")
227+
.put("1582-10-13 15:15:15.123", "1582-10-15 15:15:15.123")
228+
.put("1582-10-14 15:15:15.123", "1582-10-15 15:15:15.123")
229+
.put("1582-10-15 15:15:15.123", "1582-10-15 15:15:15.123")
230+
.buildOrThrow();
231+
232+
timestamps.forEach((gregorianTmst, hybridTmst) -> {
233+
try {
234+
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(hybridTmst).getTime();
235+
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(gregorianTmst).getTime();
236+
assertThat(convertTimestampToHybrid(gregorianMillis)).isEqualTo(julianMillis);
237+
238+
// int hybridDays = toHybridDaysFromString(hybridDate);
239+
// int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
240+
// int actualHybridDays = convertDaysToHybridCalendar(gregorianDays);
241+
// assertThat(actualHybridDays).isEqualTo(hybridDays);
242+
}
243+
catch (ParseException e) {
244+
throw new RuntimeException(e);
245+
}
246+
});
247+
}
248+
}

0 commit comments

Comments
 (0)