|
232 | 232 | import static io.trino.plugin.hive.HiveTableProperties.CSV_QUOTE;
|
233 | 233 | import static io.trino.plugin.hive.HiveTableProperties.CSV_SEPARATOR;
|
234 | 234 | import static io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY;
|
| 235 | +import static io.trino.plugin.hive.HiveTableProperties.GROK_CUSTOM_PATTERNS; |
| 236 | +import static io.trino.plugin.hive.HiveTableProperties.GROK_INPUT_FORMAT; |
235 | 237 | import static io.trino.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY;
|
236 | 238 | import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS;
|
237 | 239 | import static io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP;
|
|
251 | 253 | import static io.trino.plugin.hive.HiveTableProperties.getExternalLocation;
|
252 | 254 | import static io.trino.plugin.hive.HiveTableProperties.getExtraProperties;
|
253 | 255 | import static io.trino.plugin.hive.HiveTableProperties.getFooterSkipCount;
|
| 256 | +import static io.trino.plugin.hive.HiveTableProperties.getGrokCustomPatterns; |
| 257 | +import static io.trino.plugin.hive.HiveTableProperties.getGrokInputFormat; |
254 | 258 | import static io.trino.plugin.hive.HiveTableProperties.getHeaderSkipCount;
|
255 | 259 | import static io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat;
|
256 | 260 | import static io.trino.plugin.hive.HiveTableProperties.getNullFormat;
|
@@ -387,6 +391,9 @@ public class HiveMetadata
|
387 | 391 | private static final String REGEX_KEY = "input.regex";
|
388 | 392 | private static final String REGEX_CASE_SENSITIVE_KEY = "input.regex.case.insensitive";
|
389 | 393 |
|
| 394 | + private static final String GROK_INPUT_FORMAT_KEY = "input.format"; |
| 395 | + private static final String GROK_CUSTOM_PATTERNS_KEY = "input.grokCustomPatterns"; |
| 396 | + |
390 | 397 | private static final String AUTO_PURGE_KEY = "auto.purge";
|
391 | 398 |
|
392 | 399 | public static final String MODIFYING_NON_TRANSACTIONAL_TABLE_MESSAGE = "Modifying Hive table rows is only supported for transactional tables";
|
@@ -776,6 +783,12 @@ else if (isTrinoView || isTrinoMaterializedView) {
|
776 | 783 | getSerdeProperty(table, REGEX_CASE_SENSITIVE_KEY)
|
777 | 784 | .ifPresent(regexCaseInsensitive -> properties.put(REGEX_CASE_INSENSITIVE, parseBoolean(regexCaseInsensitive)));
|
778 | 785 |
|
| 786 | + // GROK specific properties |
| 787 | + getSerdeProperty(table, GROK_INPUT_FORMAT_KEY) |
| 788 | + .ifPresent(inputFormat -> properties.put(GROK_INPUT_FORMAT_KEY, inputFormat)); |
| 789 | + getSerdeProperty(table, GROK_CUSTOM_PATTERNS_KEY) |
| 790 | + .ifPresent(grokCustomPattern -> properties.put(GROK_CUSTOM_PATTERNS_KEY, grokCustomPattern)); |
| 791 | + |
779 | 792 | Optional<String> comment = Optional.ofNullable(table.getParameters().get(Table.TABLE_COMMENT));
|
780 | 793 |
|
781 | 794 | String autoPurgeProperty = table.getParameters().get(AUTO_PURGE_KEY);
|
@@ -1265,6 +1278,30 @@ else if (avroSchemaLiteral != null) {
|
1265 | 1278 | tableProperties.put(REGEX_CASE_SENSITIVE_KEY, String.valueOf(regexCaseInsensitive));
|
1266 | 1279 | });
|
1267 | 1280 |
|
| 1281 | + // GROK specific properties |
| 1282 | + getGrokInputFormat(tableMetadata.getProperties()) |
| 1283 | + .ifPresentOrElse( |
| 1284 | + inputFormat -> { |
| 1285 | + checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.GROK, GROK_INPUT_FORMAT); |
| 1286 | +// try { |
| 1287 | +// Pattern.compile(inputFormat); // this is tricky b/c pattern.compile() is going to fail when input.format = "%{"(?<name_underscore>\\S+)"; b/c java.util.pattern doesn't support underscores in named regex groups |
| 1288 | +// } |
| 1289 | +// catch (PatternSyntaxException e) { |
| 1290 | +// throw new TrinoException(INVALID_TABLE_PROPERTY, "Invalid GROK input format value: " + inputFormat); |
| 1291 | +// } |
| 1292 | + tableProperties.put(GROK_INPUT_FORMAT_KEY, inputFormat); |
| 1293 | + }, |
| 1294 | + () -> { |
| 1295 | + if (hiveStorageFormat == HiveStorageFormat.GROK) { |
| 1296 | + throw new TrinoException(INVALID_TABLE_PROPERTY, format("GROK format requires the '%s' table property", GROK_INPUT_FORMAT)); |
| 1297 | + } |
| 1298 | + }); |
| 1299 | + getGrokCustomPatterns(tableMetadata.getProperties()) |
| 1300 | + .ifPresent(grokCustomPatterns -> { |
| 1301 | + checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.GROK, GROK_CUSTOM_PATTERNS); |
| 1302 | + tableProperties.put(GROK_CUSTOM_PATTERNS_KEY, grokCustomPatterns); |
| 1303 | + }); |
| 1304 | + |
1268 | 1305 | // Set bogus table stats to prevent Hive 2.x from gathering these stats at table creation.
|
1269 | 1306 | // These stats are not useful by themselves and can take a very long time to collect when creating an
|
1270 | 1307 | // external table over a large data set.
|
|
0 commit comments