13
13
*/
14
14
package io .trino .hive .formats .line .grok ;
15
15
16
+ import com .google .common .collect .ImmutableMap ;
16
17
import io .trino .hive .formats .line .grok .exception .GrokException ;
17
18
18
19
import java .text .ParseException ;
@@ -36,6 +37,12 @@ private Converter() {}
36
37
37
38
public static Map <String , IConverter <?>> converters = new HashMap <String , IConverter <?>>();
38
39
public static Locale locale = Locale .ENGLISH ;
40
+ private static final int MAX_SPEC_PARTS = 3 ; // field ID, datatype, and datetype arguments
41
+ private static final int FIELD_ID_AND_DATATYPE = 2 ;
42
+ private static final int ONLY_FIELD_ID = 1 ;
43
+ private static final int FIELD_ID_IDX = 0 ;
44
+ private static final int DATATYPE_IDX = 1 ;
45
+ private static final int DATATYPE_ARGS_IDX = 2 ;
39
46
40
47
static {
41
48
converters .put ("byte" , new ByteConverter ());
@@ -60,20 +67,48 @@ private static IConverter getConverter(String key)
60
67
return converter ;
61
68
}
62
69
63
- public static KeyValue convert (String key , Object value , Grok grok )
70
+ /**
71
+ * Convert a value according to the specified key pattern and Grok config
72
+ *
73
+ * The key can be of the form:
74
+ * fieldID
75
+ * fieldID:datatype
76
+ * fieldID:datatype:datatypeArgs
77
+ *
78
+ * fieldID - Identifier of field being parsed
79
+ * datatype - (Optional) target data type (e.g. int, string, date)
80
+ * args - (Optional) arguments to the data type (e.g. date format)
81
+ *
82
+ * @param key The pattern key with components field, data type, and args
83
+ * @param value The value to convert
84
+ * @param grok Grok instance containing pattern configs and conversion settings (e.g. strict mode)
85
+ * @return ImmutableMap containing the field ID and its converted value
86
+ * @throws GrokException If conversion fails or if pattern/datatype is invalid
87
+ *
88
+ * converting a timestamp: convert("timestamp:date:yyyy-MM-dd", "2023-12-25", grok)
89
+ * timestamp is the field ID, date is the data type, and yyyy-MM-dd is the date format argument
90
+ *
91
+ * converting int: convert("status:int", "200", grok)
92
+ * status is the field ID, int is the data type
93
+ *
94
+ * using default data type from pattern: convert("message", "Hello World", grok)
95
+ * message is the field ID, no data type is specified, so the default data type from the pattern is used
96
+ *
97
+ */
98
+ public static ImmutableMap <String , Object > convert (String key , Object value , Grok grok )
64
99
throws GrokException
65
100
{
66
- String [] spec = key .split (";|:" , 3 );
101
+ String [] spec = key .split (";|:" , MAX_SPEC_PARTS );
67
102
try {
68
103
// process situations with field id [and datatype]
69
- if (spec .length <= 2 ) {
104
+ if (spec .length <= FIELD_ID_AND_DATATYPE ) {
70
105
String pattern = grok .getGrokPatternPatterns ().get (key ); // actual pattern name
71
106
String defaultDataType = grok .getGrokPatternDefaultDatatype ().get (pattern ); // default datatype of the pattern
72
107
// process Date datatype with no format arguments
73
108
// 1. not in strict mode && no assigned data type && the default data type is datetime or date
74
109
// 2. assigned data type is datetime or date && no date format argument
75
- if ((!grok .getStrictMode () && spec .length == 1 && defaultDataType != null && (defaultDataType .equals ("datetime" ) || defaultDataType .equals ("date" )))
76
- || (spec .length == 2 && (spec [1 ].equals ("datetime" ) || spec [1 ].equals ("date" )))) {
110
+ if ((!grok .getStrictMode () && spec .length == ONLY_FIELD_ID && defaultDataType != null && (defaultDataType .equals ("datetime" ) || defaultDataType .equals ("date" )))
111
+ || (spec .length == FIELD_ID_AND_DATATYPE && (spec [DATATYPE_IDX ].equals ("datetime" ) || spec [DATATYPE_IDX ].equals ("date" )))) {
77
112
// check whether to get the date format already when parsing the previous records
78
113
String dateFormat = grok .getGrokPatternPatterns ().get (key + "dateformat" );
79
114
Date date = null ;
@@ -100,30 +135,30 @@ public static KeyValue convert(String key, Object value, Grok grok)
100
135
}
101
136
if (date != null ) {
102
137
// if parse successfully, return date object
103
- return new KeyValue (spec [0 ], date );
138
+ return ImmutableMap . of (spec [FIELD_ID_IDX ], date );
104
139
}
105
140
else {
106
141
// if failed, return string object
107
- return new KeyValue (spec [0 ], String .valueOf (value ));
142
+ return ImmutableMap . of (spec [FIELD_ID_IDX ], String .valueOf (value ));
108
143
}
109
144
}
110
- else if (spec .length == 1 ) {
145
+ else if (spec .length == ONLY_FIELD_ID ) {
111
146
if (grok .getStrictMode ()) {
112
147
// if in strict mode, never do automatic data type conversion
113
148
defaultDataType = null ;
114
149
}
115
150
// process situations with only field id (check default datatype, except date and datetime)
116
- return new KeyValue (spec [0 ],
151
+ return ImmutableMap . of (spec [FIELD_ID_IDX ],
117
152
defaultDataType == null ? String .valueOf (value ) : getConverter (defaultDataType ).convert (String .valueOf (value )));
118
153
}
119
154
else {
120
155
// process situations with field id and datatype (except date and datetime)
121
- return new KeyValue (spec [0 ], getConverter (spec [1 ]).convert (String .valueOf (value )));
156
+ return ImmutableMap . of (spec [FIELD_ID_IDX ], getConverter (spec [DATATYPE_IDX ]).convert (String .valueOf (value )));
122
157
}
123
158
}
124
- else if (spec .length == 3 ) {
159
+ else if (spec .length == MAX_SPEC_PARTS ) {
125
160
// process situations with field id, datatype and datatype arguments
126
- return new KeyValue (spec [0 ], getConverter (spec [1 ]).convert (String .valueOf (value ), spec [2 ]));
161
+ return ImmutableMap . of (spec [FIELD_ID_IDX ], getConverter (spec [DATATYPE_IDX ]).convert (String .valueOf (value ), spec [DATATYPE_ARGS_IDX ]));
127
162
}
128
163
else {
129
164
throw new GrokException ("Unsupported spec : " + key );
@@ -132,11 +167,11 @@ else if (spec.length == 3) {
132
167
catch (Exception e ) {
133
168
if (!grok .getStrictMode ()) {
134
169
// if not in strict mode, try to convert everything to string when meeting a data type conversion error
135
- return new KeyValue (spec [0 ], String .valueOf (value ));
170
+ return ImmutableMap . of (spec [0 ], String .valueOf (value ));
136
171
}
137
172
else {
138
173
// if in strict mode, throw exception when meeting a data type conversion error
139
- throw new GrokException ("Unable to finish data type conversion of " + spec [0 ] + ":" + e .getMessage ());
174
+ throw new GrokException ("Unable to finish data type conversion of " + spec [FIELD_ID_IDX ] + ":" + e .getMessage ());
140
175
}
141
176
}
142
177
}
0 commit comments