Skip to content

Add hive configs for supported read and write formats #25147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ public S3SelectTestHelper(String host,
config.getRecursiveDirWalkerEnabled(),
new ConfigBasedCacheQuotaRequirementProvider(cacheConfig),
new HiveEncryptionInformationProvider(ImmutableSet.of()),
new HivePartitionSkippabilityChecker());
new HivePartitionSkippabilityChecker(),
ImmutableList.of());
pageSourceProvider = new HivePageSourceProvider(
config,
hdfsEnvironment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ public class HiveClientConfig
private int parquetQuickStatsMaxConcurrentCalls = 500;
private int quickStatsMaxConcurrentCalls = 100;
private boolean legacyTimestampBucketing;
private List<String> readFormats = ImmutableList.of();
private String writeFormats;

@Min(0)
public int getMaxInitialSplits()
Expand Down Expand Up @@ -464,7 +466,7 @@ public List<String> getResourceConfigFiles()
@Config("hive.config.resources")
public HiveClientConfig setResourceConfigFiles(String files)
{
this.resourceConfigFiles = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(files);
this.resourceConfigFiles = SPLITTER.splitToList(files);
return this;
}

Expand Down Expand Up @@ -1831,4 +1833,30 @@ public HiveClientConfig setLegacyTimestampBucketing(boolean legacyTimestampBucke
this.legacyTimestampBucketing = legacyTimestampBucketing;
return this;
}

@Config("hive.read-formats")
@ConfigDescription("File formats supported for read operation.")
public HiveClientConfig setReadFormats(String formats)
{
this.readFormats = SPLITTER.splitToList(formats);
return this;
}

public List<String> getReadFormats()
{
return readFormats;
}

@Config("hive.write-formats")
@ConfigDescription("File formats supported for write operation.")
public HiveClientConfig setWriteFormats(String formats)
{
this.writeFormats = formats;
return this;
}

public String getWriteFormats()
{
return writeFormats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@
import static com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableSchema;
import static com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableStorageFormat;
import static com.facebook.presto.hive.HiveSessionProperties.getVirtualBucketCount;
import static com.facebook.presto.hive.HiveSessionProperties.getWriteFormats;
import static com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled;
import static com.facebook.presto.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite;
import static com.facebook.presto.hive.HiveSessionProperties.isCreateEmptyBucketFiles;
Expand Down Expand Up @@ -1913,6 +1914,12 @@ private HiveInsertTableHandle beginInsertInternal(ConnectorSession session, Conn
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName())
.orElseThrow(() -> new TableNotFoundException(tableName));
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
List<String> writeFormats = getWriteFormats(session);
if (!writeFormats.isEmpty() && !writeFormats.contains(tableStorageFormat)) {
throw new PrestoException(NOT_SUPPORTED,
format("File format %s not supported for write operation.", tableStorageFormat));
}

tableWritabilityChecker.checkTableWritable(table);

Expand All @@ -1933,7 +1940,6 @@ private HiveInsertTableHandle beginInsertInternal(ConnectorSession session, Conn
.filter(columnHandle -> !columnHandle.isHidden())
.collect(toList());

HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
LocationHandle locationHandle;
boolean isTemporaryTable = table.getTableType().equals(TEMPORARY_TABLE);
boolean tempPathRequired = isTempPathRequired(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.session.PropertyMetadata;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
Expand Down Expand Up @@ -134,6 +135,7 @@ public final class HiveSessionProperties
public static final String DYNAMIC_SPLIT_SIZES_ENABLED = "dynamic_split_sizes_enabled";
public static final String SKIP_EMPTY_FILES = "skip_empty_files";
public static final String LEGACY_TIMESTAMP_BUCKETING = "legacy_timestamp_bucketing";
public static final String WRITE_FORMATS = "write_formats";

public static final String NATIVE_STATS_BASED_FILTER_REORDER_DISABLED = "native_stats_based_filter_reorder_disabled";

Expand Down Expand Up @@ -660,7 +662,12 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
NATIVE_STATS_BASED_FILTER_REORDER_DISABLED,
"Native Execution only. Disable stats based filter reordering.",
false,
true));
true),
stringProperty(
WRITE_FORMATS,
"File formats supported for write operation.",
hiveClientConfig.getWriteFormats(),
false));
}

public List<PropertyMetadata<?>> getSessionProperties()
Expand Down Expand Up @@ -1148,4 +1155,10 @@ public static boolean isLegacyTimestampBucketing(ConnectorSession session)
{
return session.getProperty(LEGACY_TIMESTAMP_BUCKETING, Boolean.class);
}

public static List<String> getWriteFormats(ConnectorSession session)
{
String formats = session.getProperty(WRITE_FORMATS, String.class);
return (formats == null) ? ImmutableList.of() : Splitter.on(',').trimResults().omitEmptyStrings().splitToList(formats);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ public class HiveSplitManager
private final CacheQuotaRequirementProvider cacheQuotaRequirementProvider;
private final HiveEncryptionInformationProvider encryptionInformationProvider;
private final PartitionSkippabilityChecker partitionSkippabilityChecker;
private final List<String> readFormats;

@Inject
public HiveSplitManager(
Expand Down Expand Up @@ -181,7 +182,8 @@ public HiveSplitManager(
hiveClientConfig.getRecursiveDirWalkerEnabled(),
cacheQuotaRequirementProvider,
encryptionInformationProvider,
partitionSkippabilityChecker);
partitionSkippabilityChecker,
hiveClientConfig.getReadFormats());
}

public HiveSplitManager(
Expand All @@ -200,7 +202,8 @@ public HiveSplitManager(
boolean recursiveDfsWalkerEnabled,
CacheQuotaRequirementProvider cacheQuotaRequirementProvider,
HiveEncryptionInformationProvider encryptionInformationProvider,
PartitionSkippabilityChecker partitionSkippabilityChecker)
PartitionSkippabilityChecker partitionSkippabilityChecker,
List<String> readFormats)
{
this.hiveTransactionManager = requireNonNull(hiveTransactionManager, "hiveTransactionManager is null");
this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
Expand All @@ -219,6 +222,7 @@ public HiveSplitManager(
this.cacheQuotaRequirementProvider = requireNonNull(cacheQuotaRequirementProvider, "cacheQuotaRequirementProvider is null");
this.encryptionInformationProvider = requireNonNull(encryptionInformationProvider, "encryptionInformationProvider is null");
this.partitionSkippabilityChecker = requireNonNull(partitionSkippabilityChecker, "partitionSkippabilityChecker is null");
this.readFormats = requireNonNull(readFormats, "readFormats is null");
}

@Override
Expand Down Expand Up @@ -250,6 +254,15 @@ public ConnectorSplitSource getSplits(
session.getRuntimeStats());
Table table = layout.getTable(metastore, metastoreContext);

if (!readFormats.isEmpty()) {
StorageFormat storageFormat = table.getStorage().getStorageFormat();
Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
if (hiveStorageFormat.isPresent() && !readFormats.contains(hiveStorageFormat.get())) {
throw new HiveNotReadableException(tableName, Optional.empty(),
format("File format %s not supported for read operation.", hiveStorageFormat.get()));
}
}

if (!isOfflineDataDebugModeEnabled(session)) {
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,8 @@ protected final void setup(String databaseName, HiveClientConfig hiveClientConfi
false,
new ConfigBasedCacheQuotaRequirementProvider(cacheConfig),
encryptionInformationProvider,
new HivePartitionSkippabilityChecker());
new HivePartitionSkippabilityChecker(),
ImmutableList.of());
pageSinkProvider = new HivePageSinkProvider(
getDefaultHiveFileWriterFactories(hiveClientConfig, metastoreClientConfig),
hdfsEnvironment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ protected void setup(String host, int port, String databaseName, BiFunction<Hive
config.getRecursiveDirWalkerEnabled(),
new ConfigBasedCacheQuotaRequirementProvider(cacheConfig),
new HiveEncryptionInformationProvider(ImmutableSet.of()),
new HivePartitionSkippabilityChecker());
new HivePartitionSkippabilityChecker(),
ImmutableList.of());
pageSinkProvider = new HivePageSinkProvider(
getDefaultHiveFileWriterFactories(config, metastoreClientConfig),
hdfsEnvironment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ public void testDefaults()
.setMaxConcurrentParquetQuickStatsCalls(500)
.setCteVirtualBucketCount(128)
.setSkipEmptyFilesEnabled(false)
.setLegacyTimestampBucketing(false));
.setLegacyTimestampBucketing(false)
.setReadFormats("")
.setWriteFormats(null));
}

@Test
Expand Down Expand Up @@ -293,6 +295,8 @@ public void testExplicitPropertyMappings()
.put("hive.cte-virtual-bucket-count", "256")
.put("hive.skip-empty-files", "true")
.put("hive.legacy-timestamp-bucketing", "true")
.put("hive.read-formats", "DWRF,ORC,PARQUET")
.put("hive.write-formats", "DWRF,PARQUET")
.build();

HiveClientConfig expected = new HiveClientConfig()
Expand Down Expand Up @@ -414,7 +418,9 @@ public void testExplicitPropertyMappings()
.setMaxConcurrentQuickStatsCalls(101)
.setSkipEmptyFilesEnabled(true)
.setCteVirtualBucketCount(256)
.setLegacyTimestampBucketing(true);
.setLegacyTimestampBucketing(true)
.setReadFormats("DWRF,ORC,PARQUET")
.setWriteFormats("DWRF,PARQUET");

ConfigAssertions.assertFullMapping(properties, expected);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,8 @@ private void assertRedundantColumnDomains(Range predicateRange, PartitionStatist
false,
new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()),
new HiveEncryptionInformationProvider(ImmutableList.of()),
new HivePartitionSkippabilityChecker());
new HivePartitionSkippabilityChecker(),
ImmutableList.of());

HiveColumnHandle partitionColumn = new HiveColumnHandle(
"ds",
Expand Down Expand Up @@ -700,7 +701,8 @@ public void testEncryptionInformation()
false,
new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()),
encryptionInformationProvider,
new HivePartitionSkippabilityChecker());
new HivePartitionSkippabilityChecker(),
ImmutableList.of());

HiveColumnHandle partitionColumn = new HiveColumnHandle(
"ds",
Expand Down
Loading