Skip to content

Commit 2381504

Browse files
committed
HIVE-29383: Iceberg: [V3] Add support for timestamp with nanosecond precession
1 parent 90bf407 commit 2381504

File tree

23 files changed

+452
-30
lines changed

23 files changed

+452
-30
lines changed

iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
2828
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
2929
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
30+
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
31+
import org.apache.hadoop.hive.serde2.typeinfo.TimestampTypeInfo;
3032
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
3133
import org.apache.iceberg.Schema;
3234
import org.apache.iceberg.expressions.Expressions;
@@ -130,6 +132,10 @@ Type convertType(TypeInfo typeInfo, String defaultValue) {
130132
case STRING:
131133
return Types.StringType.get();
132134
case TIMESTAMP:
135+
TimestampTypeInfo ts = (TimestampTypeInfo) typeInfo;
136+
if (ts.getPrecision() == 9) {
137+
return Types.TimestampNanoType.withoutZone();
138+
}
133139
return Types.TimestampType.withoutZone();
134140
case DATE:
135141
return Types.DateType.get();
@@ -141,6 +147,10 @@ Type convertType(TypeInfo typeInfo, String defaultValue) {
141147
default:
142148
// special case for Timestamp with Local TZ which is only available in Hive3
143149
if ("TIMESTAMPLOCALTZ".equalsIgnoreCase(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().name())) {
150+
TimestampLocalTZTypeInfo tz = (TimestampLocalTZTypeInfo) typeInfo;
151+
if (tz.getPrecision() == 9) {
152+
return Types.TimestampNanoType.withZone();
153+
}
144154
return Types.TimestampType.withZone();
145155
}
146156
throw new IllegalArgumentException("Unsupported Hive type (" +

iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,12 @@ public static String convertToTypeString(Type type) {
379379
return "timestamp with local time zone";
380380
}
381381
return "timestamp";
382+
case TIMESTAMP_NANO:
383+
Types.TimestampNanoType timestampNanoType = (Types.TimestampNanoType) type;
384+
if (timestampNanoType.shouldAdjustToUTC()) {
385+
return "timestamp with local time zone(9)";
386+
}
387+
return "timestamp(9)";
382388
case FIXED:
383389
case BINARY:
384390
return "binary";

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ public ObjectInspector primitive(Type.PrimitiveType primitiveType) {
133133
case TIMESTAMP:
134134
boolean adjustToUTC = ((Types.TimestampType) primitiveType).shouldAdjustToUTC();
135135
return adjustToUTC ? TIMESTAMP_INSPECTOR_WITH_TZ : TIMESTAMP_INSPECTOR;
136+
case TIMESTAMP_NANO:
137+
boolean adjustUTC = ((Types.TimestampNanoType) primitiveType).shouldAdjustToUTC();
138+
return adjustUTC ?
139+
IcebergTimestampWithZoneObjectInspectorHive3.get(9) :
140+
IcebergTimestampObjectInspectorHive3.get(9);
136141
case TIME:
137142
return IcebergTimeObjectInspector.get();
138143
default:

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,32 @@
2626
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
2727
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
2828
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
29+
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
2930
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
3031

3132

3233
public class IcebergTimestampObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector
3334
implements TimestampObjectInspector, WriteObjectInspector {
3435

35-
private static final IcebergTimestampObjectInspectorHive3 INSTANCE = new IcebergTimestampObjectInspectorHive3();
36+
private static final IcebergTimestampObjectInspectorHive3 INSTANCE =
37+
new IcebergTimestampObjectInspectorHive3(TypeInfoFactory.timestampTypeInfo);
38+
39+
private static final IcebergTimestampObjectInspectorHive3 NANO_INSTANCE =
40+
new IcebergTimestampObjectInspectorHive3(TypeInfoFactory.nanoTimestampTypeInfo);
3641

3742
public static IcebergTimestampObjectInspectorHive3 get() {
3843
return INSTANCE;
3944
}
4045

41-
private IcebergTimestampObjectInspectorHive3() {
42-
super(TypeInfoFactory.timestampTypeInfo);
46+
public static IcebergTimestampObjectInspectorHive3 get(int precision) {
47+
if (precision == 9) {
48+
return NANO_INSTANCE;
49+
}
50+
return INSTANCE;
51+
}
52+
53+
private IcebergTimestampObjectInspectorHive3(PrimitiveTypeInfo typeInfo) {
54+
super(typeInfo);
4355
}
4456

4557
@Override

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspectorHive3.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
2626
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
2727
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector;
28+
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
2829
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
2930
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
3031

@@ -33,14 +34,24 @@ public class IcebergTimestampWithZoneObjectInspectorHive3 extends AbstractPrimit
3334
implements TimestampLocalTZObjectInspector, WriteObjectInspector {
3435

3536
private static final IcebergTimestampWithZoneObjectInspectorHive3 INSTANCE =
36-
new IcebergTimestampWithZoneObjectInspectorHive3();
37+
new IcebergTimestampWithZoneObjectInspectorHive3(TypeInfoFactory.timestampLocalTZTypeInfo);
38+
39+
private static final IcebergTimestampWithZoneObjectInspectorHive3 NANO_INSTANCE =
40+
new IcebergTimestampWithZoneObjectInspectorHive3(TypeInfoFactory.timestampNanoLocalTZTypeInfo);
3741

3842
public static IcebergTimestampWithZoneObjectInspectorHive3 get() {
3943
return INSTANCE;
4044
}
4145

42-
private IcebergTimestampWithZoneObjectInspectorHive3() {
43-
super(TypeInfoFactory.timestampLocalTZTypeInfo);
46+
public static IcebergTimestampWithZoneObjectInspectorHive3 get(int precision) {
47+
if (precision == 9) {
48+
return NANO_INSTANCE;
49+
}
50+
return INSTANCE;
51+
}
52+
53+
private IcebergTimestampWithZoneObjectInspectorHive3(PrimitiveTypeInfo typeInfo) {
54+
super(typeInfo);
4455
}
4556

4657
@Override
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- Mask random uuid
2+
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
3+
--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/
4+
-- Mask random snapshot id
5+
--! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/
6+
-- Mask current-snapshot-timestamp-ms
7+
--! qt:replace:/('current-snapshot-timestamp-ms'=')\d+/$1#Masked#/
8+
-- Mask iceberg version
9+
--! qt:replace:/("iceberg-version":")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))/$1#Masked#/
10+
-- Mask added-files-size
11+
--! qt:replace:/(\S\"added-files-size":")(\d+)(")/$1#Masked#$3/
12+
-- Mask total-files-size
13+
--! qt:replace:/(\S\"total-files-size":")(\d+)(")/$1#Masked#$3/
14+
15+
CREATE TABLE t (
16+
ts_us timestamp,
17+
ts_ns timestamp(9),
18+
ts_tz_us timestamp with local time zone,
19+
ts_tz_ns timestamp with local time zone(9)
20+
)
21+
STORED BY ICEBERG
22+
TBLPROPERTIES ('format-version'='3');
23+
24+
INSERT INTO t VALUES (
25+
'2025-12-18 10:15:30.123456789',
26+
'2025-12-18 10:15:30.123456789',
27+
'2025-12-18 10:15:30.123456789',
28+
'2025-12-18 10:15:30.123456789'
29+
);
30+
31+
SELECT ts_ns FROM t ORDER BY ts_ns;
32+
SELECT ts_tz_ns FROM t ORDER BY ts_tz_ns;
33+
SELECT CAST(ts_ns AS STRING) FROM t;
34+
SELECT CAST(ts_tz_ns AS STRING) FROM t;
35+
36+
SELECT * FROM t;
37+
38+
CREATE TABLE tgt STORED BY ICEBERG TBLPROPERTIES ('format-version'='3') AS SELECT * FROM t;
39+
40+
SELECT * FROM tgt;
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
PREHOOK: query: CREATE TABLE t (
2+
ts_us timestamp,
3+
ts_ns timestamp(9),
4+
ts_tz_us timestamp with local time zone,
5+
ts_tz_ns timestamp with local time zone(9)
6+
)
7+
STORED BY ICEBERG
8+
TBLPROPERTIES ('format-version'='3')
9+
PREHOOK: type: CREATETABLE
10+
PREHOOK: Output: database:default
11+
PREHOOK: Output: default@t
12+
POSTHOOK: query: CREATE TABLE t (
13+
ts_us timestamp,
14+
ts_ns timestamp(9),
15+
ts_tz_us timestamp with local time zone,
16+
ts_tz_ns timestamp with local time zone(9)
17+
)
18+
STORED BY ICEBERG
19+
TBLPROPERTIES ('format-version'='3')
20+
POSTHOOK: type: CREATETABLE
21+
POSTHOOK: Output: database:default
22+
POSTHOOK: Output: default@t
23+
PREHOOK: query: INSERT INTO t VALUES (
24+
'2025-12-18 10:15:30.123456789',
25+
'2025-12-18 10:15:30.123456789',
26+
'2025-12-18 10:15:30.123456789',
27+
'2025-12-18 10:15:30.123456789'
28+
)
29+
PREHOOK: type: QUERY
30+
PREHOOK: Input: _dummy_database@_dummy_table
31+
PREHOOK: Output: default@t
32+
POSTHOOK: query: INSERT INTO t VALUES (
33+
'2025-12-18 10:15:30.123456789',
34+
'2025-12-18 10:15:30.123456789',
35+
'2025-12-18 10:15:30.123456789',
36+
'2025-12-18 10:15:30.123456789'
37+
)
38+
POSTHOOK: type: QUERY
39+
POSTHOOK: Input: _dummy_database@_dummy_table
40+
POSTHOOK: Output: default@t
41+
PREHOOK: query: SELECT ts_ns FROM t ORDER BY ts_ns
42+
PREHOOK: type: QUERY
43+
PREHOOK: Input: default@t
44+
PREHOOK: Output: hdfs://### HDFS PATH ###
45+
POSTHOOK: query: SELECT ts_ns FROM t ORDER BY ts_ns
46+
POSTHOOK: type: QUERY
47+
POSTHOOK: Input: default@t
48+
POSTHOOK: Output: hdfs://### HDFS PATH ###
49+
2025-12-18 10:15:30.123456789
50+
PREHOOK: query: SELECT ts_tz_ns FROM t ORDER BY ts_tz_ns
51+
PREHOOK: type: QUERY
52+
PREHOOK: Input: default@t
53+
PREHOOK: Output: hdfs://### HDFS PATH ###
54+
POSTHOOK: query: SELECT ts_tz_ns FROM t ORDER BY ts_tz_ns
55+
POSTHOOK: type: QUERY
56+
POSTHOOK: Input: default@t
57+
POSTHOOK: Output: hdfs://### HDFS PATH ###
58+
2025-12-18 10:15:30.123456789 US/Pacific
59+
PREHOOK: query: SELECT CAST(ts_ns AS STRING) FROM t
60+
PREHOOK: type: QUERY
61+
PREHOOK: Input: default@t
62+
PREHOOK: Output: hdfs://### HDFS PATH ###
63+
POSTHOOK: query: SELECT CAST(ts_ns AS STRING) FROM t
64+
POSTHOOK: type: QUERY
65+
POSTHOOK: Input: default@t
66+
POSTHOOK: Output: hdfs://### HDFS PATH ###
67+
2025-12-18 10:15:30.123456789
68+
PREHOOK: query: SELECT CAST(ts_tz_ns AS STRING) FROM t
69+
PREHOOK: type: QUERY
70+
PREHOOK: Input: default@t
71+
PREHOOK: Output: hdfs://### HDFS PATH ###
72+
POSTHOOK: query: SELECT CAST(ts_tz_ns AS STRING) FROM t
73+
POSTHOOK: type: QUERY
74+
POSTHOOK: Input: default@t
75+
POSTHOOK: Output: hdfs://### HDFS PATH ###
76+
2025-12-18 10:15:30.123456789 US/Pacific
77+
PREHOOK: query: SELECT * FROM t
78+
PREHOOK: type: QUERY
79+
PREHOOK: Input: default@t
80+
PREHOOK: Output: hdfs://### HDFS PATH ###
81+
POSTHOOK: query: SELECT * FROM t
82+
POSTHOOK: type: QUERY
83+
POSTHOOK: Input: default@t
84+
POSTHOOK: Output: hdfs://### HDFS PATH ###
85+
2025-12-18 10:15:30.123456 2025-12-18 10:15:30.123456789 2025-12-18 10:15:30.123456 US/Pacific 2025-12-18 10:15:30.123456789 US/Pacific
86+
PREHOOK: query: CREATE TABLE tgt STORED BY ICEBERG TBLPROPERTIES ('format-version'='3') AS SELECT * FROM t
87+
PREHOOK: type: CREATETABLE_AS_SELECT
88+
PREHOOK: Input: default@t
89+
PREHOOK: Output: database:default
90+
PREHOOK: Output: default@tgt
91+
PREHOOK: Output: hdfs://### HDFS PATH ###
92+
POSTHOOK: query: CREATE TABLE tgt STORED BY ICEBERG TBLPROPERTIES ('format-version'='3') AS SELECT * FROM t
93+
POSTHOOK: type: CREATETABLE_AS_SELECT
94+
POSTHOOK: Input: default@t
95+
POSTHOOK: Output: database:default
96+
POSTHOOK: Output: default@tgt
97+
POSTHOOK: Output: hdfs://### HDFS PATH ###
98+
POSTHOOK: Lineage: tgt.ts_ns SIMPLE [(t)t.FieldSchema(name:ts_ns, type:timestamp(9), comment:null), ]
99+
POSTHOOK: Lineage: tgt.ts_tz_ns SIMPLE [(t)t.FieldSchema(name:ts_tz_ns, type:timestamp with local time zone(9), comment:null), ]
100+
POSTHOOK: Lineage: tgt.ts_tz_us SIMPLE [(t)t.FieldSchema(name:ts_tz_us, type:timestamp with local time zone, comment:null), ]
101+
POSTHOOK: Lineage: tgt.ts_us SIMPLE [(t)t.FieldSchema(name:ts_us, type:timestamp, comment:null), ]
102+
PREHOOK: query: SELECT * FROM tgt
103+
PREHOOK: type: QUERY
104+
PREHOOK: Input: default@tgt
105+
PREHOOK: Output: hdfs://### HDFS PATH ###
106+
POSTHOOK: query: SELECT * FROM tgt
107+
POSTHOOK: type: QUERY
108+
POSTHOOK: Input: default@tgt
109+
POSTHOOK: Output: hdfs://### HDFS PATH ###
110+
2025-12-18 10:15:30.123456 2025-12-18 10:15:30.123456789 2025-12-18 10:15:30.123456 US/Pacific 2025-12-18 10:15:30.123456789 US/Pacific

parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,9 +2434,16 @@ primitiveType
24342434
| KW_DOUBLE KW_PRECISION? -> TOK_DOUBLE
24352435
| KW_DATE -> TOK_DATE
24362436
| KW_DATETIME -> TOK_DATETIME
2437-
| KW_TIMESTAMP -> TOK_TIMESTAMP
24382437
| KW_TIMESTAMPLOCALTZ -> TOK_TIMESTAMPLOCALTZ
2439-
| KW_TIMESTAMP KW_WITH KW_LOCAL KW_TIME KW_ZONE -> TOK_TIMESTAMPLOCALTZ
2438+
| KW_TIMESTAMP
2439+
(
2440+
KW_WITH KW_LOCAL KW_TIME KW_ZONE
2441+
(LPAREN p=Number RPAREN)?
2442+
-> ^(TOK_TIMESTAMPLOCALTZ $p?)
2443+
|
2444+
(LPAREN p=Number RPAREN)?
2445+
-> ^(TOK_TIMESTAMP $p?)
2446+
)
24402447
// Uncomment to allow intervals as table column types
24412448
//| KW_INTERVAL KW_YEAR KW_TO KW_MONTH -> TOK_INTERVAL_YEAR_MONTH
24422449
//| KW_INTERVAL KW_DAY KW_TO KW_SECOND -> TOK_INTERVAL_DAY_TIME

ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import org.apache.hadoop.hive.serde2.SerDeException;
7777
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
7878
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
79+
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
7980
import org.apache.hadoop.io.Writable;
8081
import org.apache.hadoop.io.WritableComparable;
8182
import org.apache.hadoop.mapred.InputFormat;
@@ -1160,9 +1161,10 @@ public static void validateColumns(List<FieldSchema> columns, List<FieldSchema>
11601161
throw new HiveException("Duplicate column name " + colName
11611162
+ " in the table definition.");
11621163
}
1163-
if (!icebergTable && VARIANT_TYPE_NAME.equalsIgnoreCase(col.getType())) {
1164+
if (!icebergTable && isUnsupportedInNonIceberg(col.getType())) {
11641165
throw new HiveException(
1165-
"Column name " + colName + " cannot be of type 'variant' as it is not supported in non-Iceberg tables.");
1166+
"Column name " + colName + " cannot be of type '" + col.getType() + "' as it is not supported in "
1167+
+ "non-Iceberg tables.");
11661168
}
11671169
colNames.add(colName);
11681170
}
@@ -1392,4 +1394,10 @@ public List<VirtualColumn> getVirtualColumns() {
13921394

13931395
return virtualColumns;
13941396
}
1397+
1398+
private static boolean isUnsupportedInNonIceberg(String columnType) {
1399+
return VARIANT_TYPE_NAME.equalsIgnoreCase(columnType) ||
1400+
TypeInfoFactory.nanoTimestampTypeInfo.getQualifiedName().equalsIgnoreCase(columnType) ||
1401+
TypeInfoFactory.timestampNanoLocalTZTypeInfo.getQualifiedName().equalsIgnoreCase(columnType);
1402+
}
13951403
}

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ public static TypeInfo convertPrimitiveType(RelDataType rType) {
355355
} catch (HiveException e) {
356356
throw new RuntimeException(e);
357357
}
358-
return TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone());
358+
return TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone(), 6);
359359
case INTERVAL_YEAR:
360360
case INTERVAL_MONTH:
361361
case INTERVAL_YEAR_MONTH:

0 commit comments

Comments
 (0)