Skip to content

Commit 5a47548

Browse files
committed
Initial commit to support nested field pruning
1 parent 947c3f3 commit 5a47548

28 files changed

Lines changed: 927 additions & 38 deletions

presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ public class HiveClientConfig
9999
private boolean useParquetColumnNames;
100100
private boolean parquetOptimizedReaderEnabled;
101101
private boolean parquetPredicatePushdownEnabled;
102+
private boolean parquetNestedFieldsProjectionPushdownEnabled;
102103

103104
private boolean assumeCanonicalPartitionKeys;
104105

@@ -675,6 +676,18 @@ public HiveClientConfig setParquetPredicatePushdownEnabled(boolean parquetPredic
675676
return this;
676677
}
677678

679+
public boolean isParquetNestedFieldsProjectionPushdownEnabled()
680+
{
681+
return parquetNestedFieldsProjectionPushdownEnabled;
682+
}
683+
684+
@Config("hive.parquet-nested-fields-projection-pushdown.enabled")
685+
public HiveClientConfig setParquetNestedFieldsProjectionPushdownEnabled(boolean parquetNestedFieldsProjectionPushdownEnabled)
686+
{
687+
this.parquetNestedFieldsProjectionPushdownEnabled = parquetNestedFieldsProjectionPushdownEnabled;
688+
return this;
689+
}
690+
678691
@Deprecated
679692
public boolean isParquetOptimizedReaderEnabled()
680693
{

presto-hive/src/main/java/com/facebook/presto/hive/HiveColumnHandle.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import com.facebook.presto.spi.ColumnHandle;
1717
import com.facebook.presto.spi.ColumnMetadata;
18+
import com.facebook.presto.spi.predicate.FieldSet;
1819
import com.facebook.presto.spi.type.TypeManager;
1920
import com.facebook.presto.spi.type.TypeSignature;
2021
import com.fasterxml.jackson.annotation.JsonCreator;
@@ -56,22 +57,30 @@ public enum ColumnType
5657
}
5758

5859
private final String name;
60+
private final Optional<FieldSet> fieldSet;
5961
private final HiveType hiveType;
6062
private final TypeSignature typeName;
6163
private final int hiveColumnIndex;
6264
private final ColumnType columnType;
6365
private final Optional<String> comment;
6466

67+
public HiveColumnHandle(String name, HiveType hiveType, TypeSignature typeSignature, int hiveColumnIndex, ColumnType columnType, Optional<String> comment)
68+
{
69+
this(name, Optional.empty(), hiveType, typeSignature, hiveColumnIndex, columnType, comment);
70+
}
71+
6572
@JsonCreator
6673
public HiveColumnHandle(
6774
@JsonProperty("name") String name,
75+
@JsonProperty("fieldSet") Optional<FieldSet> fieldSet,
6876
@JsonProperty("hiveType") HiveType hiveType,
6977
@JsonProperty("typeSignature") TypeSignature typeSignature,
7078
@JsonProperty("hiveColumnIndex") int hiveColumnIndex,
7179
@JsonProperty("columnType") ColumnType columnType,
7280
@JsonProperty("comment") Optional<String> comment)
7381
{
7482
this.name = requireNonNull(name, "name is null");
83+
this.fieldSet = requireNonNull(fieldSet, "fieldSet is null");
7584
checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED, "hiveColumnIndex is negative");
7685
this.hiveColumnIndex = hiveColumnIndex;
7786
this.hiveType = requireNonNull(hiveType, "hiveType is null");
@@ -86,6 +95,12 @@ public String getName()
8695
return name;
8796
}
8897

98+
@JsonProperty
99+
public Optional<FieldSet> getFieldSet()
100+
{
101+
return fieldSet;
102+
}
103+
89104
@JsonProperty
90105
public HiveType getHiveType()
91106
{
@@ -134,7 +149,7 @@ public ColumnType getColumnType()
134149
@Override
135150
public int hashCode()
136151
{
137-
return Objects.hash(name, hiveColumnIndex, hiveType, columnType, comment);
152+
return Objects.hash(name, fieldSet, hiveColumnIndex, hiveType, columnType, comment);
138153
}
139154

140155
@Override
@@ -148,6 +163,7 @@ public boolean equals(Object obj)
148163
}
149164
HiveColumnHandle other = (HiveColumnHandle) obj;
150165
return Objects.equals(this.name, other.name) &&
166+
Objects.equals(this.fieldSet, other.fieldSet) &&
151167
Objects.equals(this.hiveColumnIndex, other.hiveColumnIndex) &&
152168
Objects.equals(this.hiveType, other.hiveType) &&
153169
Objects.equals(this.columnType, other.columnType) &&
@@ -159,6 +175,7 @@ public String toString()
159175
{
160176
return toStringHelper(this)
161177
.add("name", name)
178+
.add("fieldSet", fieldSet.orElse(null))
162179
.add("hiveType", hiveType)
163180
.add("hiveColumnIndex", hiveColumnIndex)
164181
.add("columnType", columnType)
@@ -202,4 +219,9 @@ public static boolean isBucketColumnHandle(HiveColumnHandle column)
202219
{
203220
return column.getHiveColumnIndex() == BUCKET_COLUMN_INDEX;
204221
}
222+
223+
public static HiveColumnHandle withFieldSet(HiveColumnHandle column, Optional<FieldSet> fieldSet)
224+
{
225+
return new HiveColumnHandle(column.name, fieldSet, column.getHiveType(), column.getTypeSignature(), column.getHiveColumnIndex(), column.getColumnType(), column.getComment());
226+
}
205227
}

presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED;
108108
import static com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME;
109109
import static com.facebook.presto.hive.HiveColumnHandle.updateRowIdHandle;
110+
import static com.facebook.presto.hive.HiveColumnHandle.withFieldSet;
110111
import static com.facebook.presto.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH;
111112
import static com.facebook.presto.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED;
112113
import static com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT;
@@ -1302,22 +1303,48 @@ public boolean supportsMetadataDelete(ConnectorSession session, ConnectorTableHa
13021303
public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> desiredColumns)
13031304
{
13041305
HiveTableHandle handle = (HiveTableHandle) tableHandle;
1306+
System.err.println("+++++++HiveMetadata::getTableLayouts+++++++");
1307+
System.err.println(constraint.getFieldSets());
13051308
HivePartitionResult hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint);
13061309

1310+
HiveTableLayoutHandle layoutHandle = new HiveTableLayoutHandle(
1311+
handle.getSchemaTableName(),
1312+
ImmutableList.copyOf(hivePartitionResult.getPartitionColumns()),
1313+
getPartitionsAsList(hivePartitionResult),
1314+
hivePartitionResult.getCompactEffectivePredicate(),
1315+
hivePartitionResult.getEnforcedConstraint(),
1316+
hivePartitionResult.getBucketHandle(),
1317+
hivePartitionResult.getBucketFilter());
1318+
1319+
if (constraint.getFieldSets().isPresent()) {
1320+
return ImmutableList.of(new ConnectorTableLayoutResult(
1321+
pruneColumnFields(layoutHandle, constraint),
1322+
constraint.getSummary()));
1323+
}
1324+
13071325
return ImmutableList.of(new ConnectorTableLayoutResult(
1308-
getTableLayout(
1309-
session,
1310-
new HiveTableLayoutHandle(
1311-
handle.getSchemaTableName(),
1312-
ImmutableList.copyOf(hivePartitionResult.getPartitionColumns()),
1313-
getPartitionsAsList(hivePartitionResult),
1314-
hivePartitionResult.getCompactEffectivePredicate(),
1315-
hivePartitionResult.getEnforcedConstraint(),
1316-
hivePartitionResult.getBucketHandle(),
1317-
hivePartitionResult.getBucketFilter())),
1326+
getTableLayout(session, layoutHandle),
13181327
hivePartitionResult.getUnenforcedConstraint()));
13191328
}
13201329

1330+
private ConnectorTableLayout pruneColumnFields(HiveTableLayoutHandle layoutHandle, Constraint<ColumnHandle> constraint)
1331+
{
1332+
Optional<List<ColumnHandle>> columns = constraint.getFieldSets()
1333+
.map(fieldsSets -> fieldsSets.stream()
1334+
.filter(entry -> !((HiveColumnHandle) entry.getKey()).getFieldSet().isPresent())
1335+
.map(entry -> withFieldSet((HiveColumnHandle) entry.getKey(), Optional.of(entry.getValue())))
1336+
.collect(toImmutableList()));
1337+
1338+
return new ConnectorTableLayout(
1339+
layoutHandle,
1340+
columns,
1341+
TupleDomain.all(),
1342+
Optional.empty(),
1343+
Optional.empty(),
1344+
Optional.empty(),
1345+
emptyList());
1346+
}
1347+
13211348
@Override
13221349
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle)
13231350
{

presto-hive/src/main/java/com/facebook/presto/hive/HivePageSourceProvider.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ public static List<HiveColumnHandle> toColumnHandles(List<ColumnMapping> regular
356356
}
357357
return new HiveColumnHandle(
358358
columnHandle.getName(),
359+
columnHandle.getFieldSet(),
359360
columnMapping.getCoercionFrom().get(),
360361
columnMapping.getCoercionFrom().get().getTypeSignature(),
361362
columnHandle.getHiveColumnIndex(),

presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ public final class HiveSessionProperties
4949
private static final String RESPECT_TABLE_FORMAT = "respect_table_format";
5050
private static final String PARQUET_PREDICATE_PUSHDOWN_ENABLED = "parquet_predicate_pushdown_enabled";
5151
private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled";
52+
private static final String PARQUET_NESTED_FIELDS_PROJECTION_PUSHDOWN_READER_ENABLED = "parquet_nested_fields_projection_pushdown_enabled";
5253
private static final String MAX_SPLIT_SIZE = "max_split_size";
5354
private static final String MAX_INITIAL_SPLIT_SIZE = "max_initial_split_size";
5455
public static final String RCFILE_OPTIMIZED_WRITER_ENABLED = "rcfile_optimized_writer_enabled";
@@ -153,6 +154,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
153154
"Experimental: Parquet: Enable predicate pushdown for Parquet",
154155
hiveClientConfig.isParquetPredicatePushdownEnabled(),
155156
false),
157+
booleanSessionProperty(
158+
PARQUET_NESTED_FIELDS_PROJECTION_PUSHDOWN_READER_ENABLED,
159+
"Experimental: Parquet: Enable nested fields projection pushdown for Parquet",
160+
hiveClientConfig.isParquetNestedFieldsProjectionPushdownEnabled(),
161+
false),
156162
dataSizeSessionProperty(
157163
MAX_SPLIT_SIZE,
158164
"Max split size",
@@ -299,6 +305,11 @@ public static boolean isParquetPredicatePushdownEnabled(ConnectorSession session
299305
return session.getProperty(PARQUET_PREDICATE_PUSHDOWN_ENABLED, Boolean.class);
300306
}
301307

308+
public static boolean isParquetNestedFieldsProjectionPushdownEnabled(ConnectorSession session)
309+
{
310+
return session.getProperty(PARQUET_NESTED_FIELDS_PROJECTION_PUSHDOWN_READER_ENABLED, Boolean.class);
311+
}
312+
302313
public static DataSize getMaxSplitSize(ConnectorSession session)
303314
{
304315
return session.getProperty(MAX_SPLIT_SIZE, DataSize.class);

presto-hive/src/main/java/com/facebook/presto/hive/orc/OrcPageSourceFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColu
261261
physicalOrdinal = nextMissingColumnIndex;
262262
nextMissingColumnIndex++;
263263
}
264-
physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment()));
264+
physicalColumns.add(new HiveColumnHandle(column.getName(), column.getFieldSet(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment()));
265265
}
266266
return physicalColumns.build();
267267
}

presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,14 @@
5555
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR;
5656
import static com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT;
5757
import static com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA;
58+
import static com.facebook.presto.hive.HiveSessionProperties.isParquetNestedFieldsProjectionPushdownEnabled;
5859
import static com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled;
5960
import static com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled;
6061
import static com.facebook.presto.hive.HiveUtil.getDeserializerClassName;
6162
import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource;
6263
import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getColumnIO;
6364
import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getDescriptors;
64-
import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType;
65+
import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getPrunedParquetType;
6566
import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate;
6667
import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.getParquetTupleDomain;
6768
import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches;
@@ -181,7 +182,7 @@ public static ParquetPageSource createParquetPageSource(
181182

182183
List<parquet.schema.Type> fields = columns.stream()
183184
.filter(column -> column.getColumnType() == REGULAR)
184-
.map(column -> getParquetType(column, fileSchema, useParquetColumnNames))
185+
.map(column -> getPrunedParquetType(column, fileSchema, useParquetColumnNames, isParquetNestedFieldsProjectionPushdownEnabled(session)))
185186
.filter(Objects::nonNull)
186187
.collect(toList());
187188

presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTypeUtils.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import com.facebook.presto.spi.type.TimestampType;
2525
import com.facebook.presto.spi.type.Type;
2626
import com.facebook.presto.spi.type.VarcharType;
27+
import com.google.common.collect.ImmutableMap;
28+
import com.google.common.collect.ImmutableSet;
2729
import parquet.column.Encoding;
2830
import parquet.io.ColumnIO;
2931
import parquet.io.ColumnIOFactory;
@@ -37,12 +39,15 @@
3739

3840
import java.util.Arrays;
3941
import java.util.HashMap;
42+
import java.util.HashSet;
4043
import java.util.List;
4144
import java.util.Map;
4245
import java.util.Optional;
46+
import java.util.Set;
4347

4448
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
4549
import static com.google.common.base.Preconditions.checkArgument;
50+
import static com.google.common.collect.ImmutableList.toImmutableList;
4651
import static java.util.Optional.empty;
4752
import static parquet.schema.OriginalType.DECIMAL;
4853
import static parquet.schema.Type.Repetition.REPEATED;
@@ -194,6 +199,53 @@ public static int getFieldIndex(MessageType fileSchema, String name)
194199
}
195200
}
196201

202+
public static parquet.schema.Type getPrunedParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames, boolean pruneNestedFields)
203+
{
204+
parquet.schema.Type originalType = getParquetType(column, messageType, useParquetColumnNames);
205+
if (pruneNestedFields && column.getFieldSet().isPresent()) {
206+
return pruneParquetType(originalType, column.getFieldSet().get().getFields());
207+
}
208+
209+
return originalType;
210+
}
211+
212+
private static parquet.schema.Type pruneParquetType(parquet.schema.Type type, Set<String> requiredFields)
213+
{
214+
if (requiredFields.isEmpty()) {
215+
return type;
216+
}
217+
218+
if (type.isPrimitive()) {
219+
return type;
220+
}
221+
222+
Map<String, Set<String>> fields = groupFields(requiredFields);
223+
224+
List<parquet.schema.Type> newFields = fields.entrySet().stream()
225+
.map(entry -> pruneParquetType(type.asGroupType().getType(entry.getKey()), entry.getValue()))
226+
.collect(toImmutableList());
227+
228+
return type.asGroupType().withNewFields(newFields);
229+
}
230+
231+
private static Map<String, Set<String>> groupFields(Set<String> requiredFields)
232+
{
233+
Map<String, Set<String>> fields = new HashMap<>();
234+
for (String field : requiredFields) {
235+
String[] path = field.split("\\.", 2);
236+
String fieldName = path[0];
237+
Set<String> nestedField = path.length == 1 ? ImmutableSet.of() : ImmutableSet.of(path[1]);
238+
if (fields.containsKey(fieldName)) {
239+
fields.get(fieldName).addAll(nestedField);
240+
}
241+
else {
242+
fields.put(fieldName, new HashSet<>(nestedField));
243+
}
244+
}
245+
246+
return ImmutableMap.copyOf(fields);
247+
}
248+
197249
public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
198250
{
199251
if (useParquetColumnNames) {

presto-main/src/main/java/com/facebook/presto/server/ServerMainModule.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@
140140
import com.facebook.presto.sql.planner.LocalExecutionPlanner;
141141
import com.facebook.presto.sql.planner.NodePartitioningManager;
142142
import com.facebook.presto.sql.planner.PlanOptimizers;
143+
import com.facebook.presto.sql.planner.Symbol;
143144
import com.facebook.presto.sql.tree.Expression;
144145
import com.facebook.presto.sql.tree.FunctionCall;
145146
import com.facebook.presto.transaction.ForTransactionManager;
@@ -434,6 +435,8 @@ protected void setup(Binder binder)
434435
jsonBinder(binder).addSerializerBinding(Expression.class).to(ExpressionSerializer.class);
435436
jsonBinder(binder).addDeserializerBinding(Expression.class).to(ExpressionDeserializer.class);
436437
jsonBinder(binder).addDeserializerBinding(FunctionCall.class).to(FunctionCallDeserializer.class);
438+
jsonBinder(binder).addKeySerializerBinding(Symbol.class).to(Symbol.SymbolKeySerializer.class);
439+
jsonBinder(binder).addKeyDeserializerBinding(Symbol.class).to(Symbol.SymbolKeyDeserializer.class);
437440

438441
// query monitor
439442
configBinder(binder).bindConfig(QueryMonitorConfig.class);

presto-main/src/main/java/com/facebook/presto/sql/planner/LookupSymbolResolver.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
import java.util.Map;
2121

2222
import static com.google.common.base.Preconditions.checkArgument;
23+
import static com.google.common.collect.ImmutableMap.toImmutableMap;
2324
import static java.util.Objects.requireNonNull;
2425

2526
public class LookupSymbolResolver
2627
implements SymbolResolver
2728
{
29+
private final Map<String, Symbol> assignmentSymbolLookup;
2830
private final Map<Symbol, ColumnHandle> assignments;
2931
private final Map<ColumnHandle, NullableValue> bindings;
3032

@@ -33,18 +35,22 @@ public LookupSymbolResolver(Map<Symbol, ColumnHandle> assignments, Map<ColumnHan
3335
requireNonNull(assignments, "assignments is null");
3436
requireNonNull(bindings, "bindings is null");
3537

38+
this.assignmentSymbolLookup = assignments.keySet().stream()
39+
.collect(toImmutableMap(Symbol::getName, symbol -> symbol));
3640
this.assignments = ImmutableMap.copyOf(assignments);
3741
this.bindings = ImmutableMap.copyOf(bindings);
3842
}
3943

4044
@Override
4145
public Object getValue(Symbol symbol)
4246
{
43-
ColumnHandle column = assignments.get(symbol);
47+
Symbol assignmentSymbol = assignmentSymbolLookup.get(symbol.getName());
48+
checkArgument(assignmentSymbol != null, "Missing column assignment for %s", symbol.getName());
49+
ColumnHandle column = assignments.get(assignmentSymbol);
4450
checkArgument(column != null, "Missing column assignment for %s", symbol);
4551

4652
if (!bindings.containsKey(column)) {
47-
return symbol.toSymbolReference();
53+
return assignmentSymbol.toSymbolReference();
4854
}
4955

5056
return bindings.get(column).getValue();

0 commit comments

Comments
 (0)